xref: /freebsd/sys/kern/kern_thread.c (revision 49a2507bd1c2d72eef3d9c9d12c9d0aeef9f3f7b)
144990b8cSJulian Elischer /*
244990b8cSJulian Elischer  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
344990b8cSJulian Elischer  *  All rights reserved.
444990b8cSJulian Elischer  *
544990b8cSJulian Elischer  * Redistribution and use in source and binary forms, with or without
644990b8cSJulian Elischer  * modification, are permitted provided that the following conditions
744990b8cSJulian Elischer  * are met:
844990b8cSJulian Elischer  * 1. Redistributions of source code must retain the above copyright
944990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer as
1044990b8cSJulian Elischer  *    the first lines of this file unmodified other than the possible
1144990b8cSJulian Elischer  *    addition of one or more copyright notices.
1244990b8cSJulian Elischer  * 2. Redistributions in binary form must reproduce the above copyright
1344990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer in the
1444990b8cSJulian Elischer  *    documentation and/or other materials provided with the distribution.
1544990b8cSJulian Elischer  *
1644990b8cSJulian Elischer  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1744990b8cSJulian Elischer  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1844990b8cSJulian Elischer  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1944990b8cSJulian Elischer  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2044990b8cSJulian Elischer  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2144990b8cSJulian Elischer  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2244990b8cSJulian Elischer  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2344990b8cSJulian Elischer  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2444990b8cSJulian Elischer  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2544990b8cSJulian Elischer  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2644990b8cSJulian Elischer  * DAMAGE.
2744990b8cSJulian Elischer  */
2844990b8cSJulian Elischer 
29677b542eSDavid E. O'Brien #include <sys/cdefs.h>
30677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
31677b542eSDavid E. O'Brien 
3244990b8cSJulian Elischer #include <sys/param.h>
3344990b8cSJulian Elischer #include <sys/systm.h>
3444990b8cSJulian Elischer #include <sys/kernel.h>
3544990b8cSJulian Elischer #include <sys/lock.h>
3644990b8cSJulian Elischer #include <sys/malloc.h>
3744990b8cSJulian Elischer #include <sys/mutex.h>
3844990b8cSJulian Elischer #include <sys/proc.h>
39904f1b77SJulian Elischer #include <sys/smp.h>
4044990b8cSJulian Elischer #include <sys/sysctl.h>
415c8329edSJulian Elischer #include <sys/sysproto.h>
4244990b8cSJulian Elischer #include <sys/filedesc.h>
43de028f5aSJeff Roberson #include <sys/sched.h>
4444990b8cSJulian Elischer #include <sys/signalvar.h>
4544990b8cSJulian Elischer #include <sys/sx.h>
46de028f5aSJeff Roberson #include <sys/tty.h>
4744990b8cSJulian Elischer #include <sys/user.h>
4844990b8cSJulian Elischer #include <sys/jail.h>
4944990b8cSJulian Elischer #include <sys/kse.h>
5044990b8cSJulian Elischer #include <sys/ktr.h>
51c76e33b6SJonathan Mini #include <sys/ucontext.h>
5244990b8cSJulian Elischer 
5344990b8cSJulian Elischer #include <vm/vm.h>
5449a2507bSAlan Cox #include <vm/vm_extern.h>
5544990b8cSJulian Elischer #include <vm/vm_object.h>
5644990b8cSJulian Elischer #include <vm/pmap.h>
5744990b8cSJulian Elischer #include <vm/uma.h>
5844990b8cSJulian Elischer #include <vm/vm_map.h>
5944990b8cSJulian Elischer 
6002fb42b0SPeter Wemm #include <machine/frame.h>
6102fb42b0SPeter Wemm 
6244990b8cSJulian Elischer /*
634f0db5e0SJulian Elischer  * KSEGRP related storage.
6444990b8cSJulian Elischer  */
654f0db5e0SJulian Elischer static uma_zone_t ksegrp_zone;
664f0db5e0SJulian Elischer static uma_zone_t kse_zone;
6744990b8cSJulian Elischer static uma_zone_t thread_zone;
685215b187SJeff Roberson static uma_zone_t upcall_zone;
6944990b8cSJulian Elischer 
704f0db5e0SJulian Elischer /* DEBUG ONLY */
7144990b8cSJulian Elischer SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
72696058c3SJulian Elischer static int thread_debug = 0;
73696058c3SJulian Elischer SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
74696058c3SJulian Elischer 	&thread_debug, 0, "thread debug");
75fdc5ecd2SDavid Xu 
765c29a450SDavid Xu static int max_threads_per_proc = 150;
77fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
784f0db5e0SJulian Elischer 	&max_threads_per_proc, 0, "Limit on threads per proc");
794f0db5e0SJulian Elischer 
805c29a450SDavid Xu static int max_groups_per_proc = 50;
81fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
82fdc5ecd2SDavid Xu 	&max_groups_per_proc, 0, "Limit on thread groups per proc");
83fdc5ecd2SDavid Xu 
840252d203SDavid Xu static int max_threads_hits;
850252d203SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
860252d203SDavid Xu 	&max_threads_hits, 0, "");
870252d203SDavid Xu 
885215b187SJeff Roberson static int virtual_cpu;
895215b187SJeff Roberson 
9044990b8cSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
9144990b8cSJulian Elischer 
925215b187SJeff Roberson TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
935c8329edSJulian Elischer TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
945c8329edSJulian Elischer TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
955215b187SJeff Roberson TAILQ_HEAD(, kse_upcall) zombie_upcalls =
965215b187SJeff Roberson 	TAILQ_HEAD_INITIALIZER(zombie_upcalls);
975215b187SJeff Roberson struct mtx kse_zombie_lock;
985215b187SJeff Roberson MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
9944990b8cSJulian Elischer 
100696058c3SJulian Elischer static void kse_purge(struct proc *p, struct thread *td);
1015215b187SJeff Roberson static void kse_purge_group(struct thread *td);
1024b4866edSDavid Xu static int thread_update_usr_ticks(struct thread *td, int user);
1035215b187SJeff Roberson static void thread_alloc_spare(struct thread *td, struct thread *spare);
1045215b187SJeff Roberson 
1055215b187SJeff Roberson static int
1065215b187SJeff Roberson sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
1075215b187SJeff Roberson {
1085215b187SJeff Roberson 	int error, new_val;
1095215b187SJeff Roberson 	int def_val;
1105215b187SJeff Roberson 
1115215b187SJeff Roberson #ifdef SMP
1125215b187SJeff Roberson 	def_val = mp_ncpus;
1135215b187SJeff Roberson #else
1145215b187SJeff Roberson 	def_val = 1;
1155215b187SJeff Roberson #endif
1165215b187SJeff Roberson 	if (virtual_cpu == 0)
1175215b187SJeff Roberson 		new_val = def_val;
1185215b187SJeff Roberson 	else
1195215b187SJeff Roberson 		new_val = virtual_cpu;
1205215b187SJeff Roberson 	error = sysctl_handle_int(oidp, &new_val, 0, req);
1215215b187SJeff Roberson         if (error != 0 || req->newptr == NULL)
1225215b187SJeff Roberson 		return (error);
1235215b187SJeff Roberson 	if (new_val < 0)
1245215b187SJeff Roberson 		return (EINVAL);
1255215b187SJeff Roberson 	virtual_cpu = new_val;
1265215b187SJeff Roberson 	return (0);
1275215b187SJeff Roberson }
1285215b187SJeff Roberson 
1295215b187SJeff Roberson /* DEBUG ONLY */
1305215b187SJeff Roberson SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
1315215b187SJeff Roberson 	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
1325215b187SJeff Roberson 	"debug virtual cpus");
1335c8329edSJulian Elischer 
13444990b8cSJulian Elischer /*
135696058c3SJulian Elischer  * Prepare a thread for use.
13644990b8cSJulian Elischer  */
13744990b8cSJulian Elischer static void
13844990b8cSJulian Elischer thread_ctor(void *mem, int size, void *arg)
13944990b8cSJulian Elischer {
14044990b8cSJulian Elischer 	struct thread	*td;
14144990b8cSJulian Elischer 
14244990b8cSJulian Elischer 	td = (struct thread *)mem;
14371fad9fdSJulian Elischer 	td->td_state = TDS_INACTIVE;
144060563ecSJulian Elischer 	td->td_oncpu	= NOCPU;
14544990b8cSJulian Elischer }
14644990b8cSJulian Elischer 
14744990b8cSJulian Elischer /*
14844990b8cSJulian Elischer  * Reclaim a thread after use.
14944990b8cSJulian Elischer  */
15044990b8cSJulian Elischer static void
15144990b8cSJulian Elischer thread_dtor(void *mem, int size, void *arg)
15244990b8cSJulian Elischer {
15344990b8cSJulian Elischer 	struct thread	*td;
15444990b8cSJulian Elischer 
15544990b8cSJulian Elischer 	td = (struct thread *)mem;
15644990b8cSJulian Elischer 
15744990b8cSJulian Elischer #ifdef INVARIANTS
15844990b8cSJulian Elischer 	/* Verify that this thread is in a safe state to free. */
15944990b8cSJulian Elischer 	switch (td->td_state) {
16071fad9fdSJulian Elischer 	case TDS_INHIBITED:
16171fad9fdSJulian Elischer 	case TDS_RUNNING:
16271fad9fdSJulian Elischer 	case TDS_CAN_RUN:
16344990b8cSJulian Elischer 	case TDS_RUNQ:
16444990b8cSJulian Elischer 		/*
16544990b8cSJulian Elischer 		 * We must never unlink a thread that is in one of
16644990b8cSJulian Elischer 		 * these states, because it is currently active.
16744990b8cSJulian Elischer 		 */
16844990b8cSJulian Elischer 		panic("bad state for thread unlinking");
16944990b8cSJulian Elischer 		/* NOTREACHED */
17071fad9fdSJulian Elischer 	case TDS_INACTIVE:
17144990b8cSJulian Elischer 		break;
17244990b8cSJulian Elischer 	default:
17344990b8cSJulian Elischer 		panic("bad thread state");
17444990b8cSJulian Elischer 		/* NOTREACHED */
17544990b8cSJulian Elischer 	}
17644990b8cSJulian Elischer #endif
17744990b8cSJulian Elischer }
17844990b8cSJulian Elischer 
17944990b8cSJulian Elischer /*
18044990b8cSJulian Elischer  * Initialize type-stable parts of a thread (when newly created).
18144990b8cSJulian Elischer  */
18244990b8cSJulian Elischer static void
18344990b8cSJulian Elischer thread_init(void *mem, int size)
18444990b8cSJulian Elischer {
18544990b8cSJulian Elischer 	struct thread	*td;
18644990b8cSJulian Elischer 
18744990b8cSJulian Elischer 	td = (struct thread *)mem;
188e6e24ff9SJulian Elischer 	mtx_lock(&Giant);
18949a2507bSAlan Cox 	vm_thread_new(td, 0);
190e6e24ff9SJulian Elischer 	mtx_unlock(&Giant);
19144990b8cSJulian Elischer 	cpu_thread_setup(td);
192de028f5aSJeff Roberson 	td->td_sched = (struct td_sched *)&td[1];
19344990b8cSJulian Elischer }
19444990b8cSJulian Elischer 
19544990b8cSJulian Elischer /*
19644990b8cSJulian Elischer  * Tear down type-stable parts of a thread (just before being discarded).
19744990b8cSJulian Elischer  */
19844990b8cSJulian Elischer static void
19944990b8cSJulian Elischer thread_fini(void *mem, int size)
20044990b8cSJulian Elischer {
20144990b8cSJulian Elischer 	struct thread	*td;
20244990b8cSJulian Elischer 
20344990b8cSJulian Elischer 	td = (struct thread *)mem;
20449a2507bSAlan Cox 	vm_thread_dispose(td);
20544990b8cSJulian Elischer }
2065215b187SJeff Roberson 
207de028f5aSJeff Roberson /*
208de028f5aSJeff Roberson  * Initialize type-stable parts of a kse (when newly created).
209de028f5aSJeff Roberson  */
210de028f5aSJeff Roberson static void
211de028f5aSJeff Roberson kse_init(void *mem, int size)
212de028f5aSJeff Roberson {
213de028f5aSJeff Roberson 	struct kse	*ke;
214de028f5aSJeff Roberson 
215de028f5aSJeff Roberson 	ke = (struct kse *)mem;
216de028f5aSJeff Roberson 	ke->ke_sched = (struct ke_sched *)&ke[1];
217de028f5aSJeff Roberson }
2185215b187SJeff Roberson 
219de028f5aSJeff Roberson /*
220de028f5aSJeff Roberson  * Initialize type-stable parts of a ksegrp (when newly created).
221de028f5aSJeff Roberson  */
222de028f5aSJeff Roberson static void
223de028f5aSJeff Roberson ksegrp_init(void *mem, int size)
224de028f5aSJeff Roberson {
225de028f5aSJeff Roberson 	struct ksegrp	*kg;
226de028f5aSJeff Roberson 
227de028f5aSJeff Roberson 	kg = (struct ksegrp *)mem;
228de028f5aSJeff Roberson 	kg->kg_sched = (struct kg_sched *)&kg[1];
229de028f5aSJeff Roberson }
23044990b8cSJulian Elischer 
23144990b8cSJulian Elischer /*
2325215b187SJeff Roberson  * KSE is linked into kse group.
2335c8329edSJulian Elischer  */
2345c8329edSJulian Elischer void
2355c8329edSJulian Elischer kse_link(struct kse *ke, struct ksegrp *kg)
2365c8329edSJulian Elischer {
2375c8329edSJulian Elischer 	struct proc *p = kg->kg_proc;
2385c8329edSJulian Elischer 
2395c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
2405c8329edSJulian Elischer 	kg->kg_kses++;
2415c8329edSJulian Elischer 	ke->ke_state	= KES_UNQUEUED;
2425c8329edSJulian Elischer 	ke->ke_proc	= p;
2435c8329edSJulian Elischer 	ke->ke_ksegrp	= kg;
2445c8329edSJulian Elischer 	ke->ke_thread	= NULL;
2455c8329edSJulian Elischer 	ke->ke_oncpu	= NOCPU;
2465215b187SJeff Roberson 	ke->ke_flags	= 0;
2475c8329edSJulian Elischer }
2485c8329edSJulian Elischer 
2495c8329edSJulian Elischer void
2505c8329edSJulian Elischer kse_unlink(struct kse *ke)
2515c8329edSJulian Elischer {
2525c8329edSJulian Elischer 	struct ksegrp *kg;
2535c8329edSJulian Elischer 
2545c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2555c8329edSJulian Elischer 	kg = ke->ke_ksegrp;
2565c8329edSJulian Elischer 	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
2575215b187SJeff Roberson 	if (ke->ke_state == KES_IDLE) {
2585215b187SJeff Roberson 		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
2595215b187SJeff Roberson 		kg->kg_idle_kses--;
2606f8132a8SJulian Elischer 	}
2615215b187SJeff Roberson 	if (--kg->kg_kses == 0)
2625215b187SJeff Roberson 		ksegrp_unlink(kg);
2635c8329edSJulian Elischer 	/*
2645c8329edSJulian Elischer 	 * Aggregate stats from the KSE
2655c8329edSJulian Elischer 	 */
2665c8329edSJulian Elischer 	kse_stash(ke);
2675c8329edSJulian Elischer }
2685c8329edSJulian Elischer 
2695c8329edSJulian Elischer void
2705c8329edSJulian Elischer ksegrp_link(struct ksegrp *kg, struct proc *p)
2715c8329edSJulian Elischer {
2725c8329edSJulian Elischer 
2735c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_threads);
2745c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
2755c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
2765c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
2775215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
2785215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
2795c8329edSJulian Elischer 	kg->kg_proc = p;
2805215b187SJeff Roberson 	/*
2815215b187SJeff Roberson 	 * the following counters are in the -zero- section
2825215b187SJeff Roberson 	 * and may not need clearing
2835215b187SJeff Roberson 	 */
2845c8329edSJulian Elischer 	kg->kg_numthreads = 0;
2855c8329edSJulian Elischer 	kg->kg_runnable   = 0;
2865c8329edSJulian Elischer 	kg->kg_kses       = 0;
2875c8329edSJulian Elischer 	kg->kg_runq_kses  = 0; /* XXXKSE change name */
2885215b187SJeff Roberson 	kg->kg_idle_kses  = 0;
2895215b187SJeff Roberson 	kg->kg_numupcalls = 0;
2905c8329edSJulian Elischer 	/* link it in now that it's consistent */
2915c8329edSJulian Elischer 	p->p_numksegrps++;
2925c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
2935c8329edSJulian Elischer }
2945c8329edSJulian Elischer 
2955c8329edSJulian Elischer void
2965c8329edSJulian Elischer ksegrp_unlink(struct ksegrp *kg)
2975c8329edSJulian Elischer {
2985c8329edSJulian Elischer 	struct proc *p;
2995c8329edSJulian Elischer 
3005c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
3015215b187SJeff Roberson 	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
3025215b187SJeff Roberson 	KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses"));
3035215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
3045215b187SJeff Roberson 
3055c8329edSJulian Elischer 	p = kg->kg_proc;
3065c8329edSJulian Elischer 	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
3075c8329edSJulian Elischer 	p->p_numksegrps--;
3085c8329edSJulian Elischer 	/*
3095c8329edSJulian Elischer 	 * Aggregate stats from the KSE
3105c8329edSJulian Elischer 	 */
3115c8329edSJulian Elischer 	ksegrp_stash(kg);
3125c8329edSJulian Elischer }
3135c8329edSJulian Elischer 
3145215b187SJeff Roberson struct kse_upcall *
3155215b187SJeff Roberson upcall_alloc(void)
3165215b187SJeff Roberson {
3175215b187SJeff Roberson 	struct kse_upcall *ku;
3185215b187SJeff Roberson 
31930621e14SDavid Xu 	ku = uma_zalloc(upcall_zone, M_WAITOK);
3205215b187SJeff Roberson 	bzero(ku, sizeof(*ku));
3215215b187SJeff Roberson 	return (ku);
3225215b187SJeff Roberson }
3235215b187SJeff Roberson 
3245215b187SJeff Roberson void
3255215b187SJeff Roberson upcall_free(struct kse_upcall *ku)
3265215b187SJeff Roberson {
3275215b187SJeff Roberson 
3285215b187SJeff Roberson 	uma_zfree(upcall_zone, ku);
3295215b187SJeff Roberson }
3305215b187SJeff Roberson 
3315215b187SJeff Roberson void
3325215b187SJeff Roberson upcall_link(struct kse_upcall *ku, struct ksegrp *kg)
3335215b187SJeff Roberson {
3345215b187SJeff Roberson 
3355215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3365215b187SJeff Roberson 	TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link);
3375215b187SJeff Roberson 	ku->ku_ksegrp = kg;
3385215b187SJeff Roberson 	kg->kg_numupcalls++;
3395215b187SJeff Roberson }
3405215b187SJeff Roberson 
3415215b187SJeff Roberson void
3425215b187SJeff Roberson upcall_unlink(struct kse_upcall *ku)
3435215b187SJeff Roberson {
3445215b187SJeff Roberson 	struct ksegrp *kg = ku->ku_ksegrp;
3455215b187SJeff Roberson 
3465215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3475215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__));
3485215b187SJeff Roberson 	TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link);
3495215b187SJeff Roberson 	kg->kg_numupcalls--;
3505215b187SJeff Roberson 	upcall_stash(ku);
3515215b187SJeff Roberson }
3525215b187SJeff Roberson 
3535215b187SJeff Roberson void
3545215b187SJeff Roberson upcall_remove(struct thread *td)
3555215b187SJeff Roberson {
3565215b187SJeff Roberson 
3575215b187SJeff Roberson 	if (td->td_upcall) {
3585215b187SJeff Roberson 		td->td_upcall->ku_owner = NULL;
3595215b187SJeff Roberson 		upcall_unlink(td->td_upcall);
3605215b187SJeff Roberson 		td->td_upcall = 0;
3615215b187SJeff Roberson 	}
3625215b187SJeff Roberson }
3635215b187SJeff Roberson 
3645c8329edSJulian Elischer /*
3655215b187SJeff Roberson  * For a newly created process,
3665215b187SJeff Roberson  * link up all the structures and its initial threads etc.
3675c8329edSJulian Elischer  */
3685c8329edSJulian Elischer void
3695c8329edSJulian Elischer proc_linkup(struct proc *p, struct ksegrp *kg,
3705c8329edSJulian Elischer 	    struct kse *ke, struct thread *td)
3715c8329edSJulian Elischer {
3725c8329edSJulian Elischer 
3735c8329edSJulian Elischer 	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
3745c8329edSJulian Elischer 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
3755c8329edSJulian Elischer 	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
3765c8329edSJulian Elischer 	p->p_numksegrps = 0;
3775c8329edSJulian Elischer 	p->p_numthreads = 0;
3785c8329edSJulian Elischer 
3795c8329edSJulian Elischer 	ksegrp_link(kg, p);
3805c8329edSJulian Elischer 	kse_link(ke, kg);
3815c8329edSJulian Elischer 	thread_link(td, kg);
3825c8329edSJulian Elischer }
3835c8329edSJulian Elischer 
3845215b187SJeff Roberson /*
3855215b187SJeff Roberson struct kse_thr_interrupt_args {
3865215b187SJeff Roberson 	struct kse_thr_mailbox * tmbx;
3875215b187SJeff Roberson };
3885215b187SJeff Roberson */
3895c8329edSJulian Elischer int
3905c8329edSJulian Elischer kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
3915c8329edSJulian Elischer {
39234e80e02SDavid Xu 	struct proc *p;
39334e80e02SDavid Xu 	struct thread *td2;
3945c8329edSJulian Elischer 
395adac9400SDavid Xu 	p = td->td_proc;
396ac2e4153SJulian Elischer 	if (!(p->p_flag & P_THREADED) || (uap->tmbx == NULL))
3978db2431fSDavid Xu 		return (EINVAL);
39834e80e02SDavid Xu 	mtx_lock_spin(&sched_lock);
39934e80e02SDavid Xu 	FOREACH_THREAD_IN_PROC(p, td2) {
40034e80e02SDavid Xu 		if (td2->td_mailbox == uap->tmbx) {
40134e80e02SDavid Xu 			td2->td_flags |= TDF_INTERRUPT;
40234e80e02SDavid Xu 			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) {
40334e80e02SDavid Xu 				if (td2->td_flags & TDF_CVWAITQ)
40434e80e02SDavid Xu 					cv_abort(td2);
40534e80e02SDavid Xu 				else
40634e80e02SDavid Xu 					abortsleep(td2);
40734e80e02SDavid Xu 			}
40834e80e02SDavid Xu 			mtx_unlock_spin(&sched_lock);
4097b290dd0SDavid Xu 			return (0);
41034e80e02SDavid Xu 		}
41134e80e02SDavid Xu 	}
41234e80e02SDavid Xu 	mtx_unlock_spin(&sched_lock);
41334e80e02SDavid Xu 	return (ESRCH);
4145c8329edSJulian Elischer }
4155c8329edSJulian Elischer 
4165215b187SJeff Roberson /*
4175215b187SJeff Roberson struct kse_exit_args {
4185215b187SJeff Roberson 	register_t dummy;
4195215b187SJeff Roberson };
4205215b187SJeff Roberson */
4215c8329edSJulian Elischer int
4225c8329edSJulian Elischer kse_exit(struct thread *td, struct kse_exit_args *uap)
4235c8329edSJulian Elischer {
4245c8329edSJulian Elischer 	struct proc *p;
4255c8329edSJulian Elischer 	struct ksegrp *kg;
426450c38d0SDavid Xu 	struct kse *ke;
4272b035cbeSJulian Elischer 	struct kse_upcall *ku, *ku2;
4282b035cbeSJulian Elischer 	int    error, count;
4295c8329edSJulian Elischer 
4305c8329edSJulian Elischer 	p = td->td_proc;
4312b035cbeSJulian Elischer 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
4327b290dd0SDavid Xu 		return (EINVAL);
4335c8329edSJulian Elischer 	kg = td->td_ksegrp;
4342b035cbeSJulian Elischer 	count = 0;
4355c8329edSJulian Elischer 	PROC_LOCK(p);
4365c8329edSJulian Elischer 	mtx_lock_spin(&sched_lock);
4372b035cbeSJulian Elischer 	FOREACH_UPCALL_IN_GROUP(kg, ku2) {
4382b035cbeSJulian Elischer 		if (ku2->ku_flags & KUF_EXITING)
4392b035cbeSJulian Elischer 			count++;
4402b035cbeSJulian Elischer 	}
4412b035cbeSJulian Elischer 	if ((kg->kg_numupcalls - count) == 1 &&
4422b035cbeSJulian Elischer 	    (kg->kg_numthreads > 1)) {
4435c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
4445c8329edSJulian Elischer 		PROC_UNLOCK(p);
4455c8329edSJulian Elischer 		return (EDEADLK);
4465c8329edSJulian Elischer 	}
4472b035cbeSJulian Elischer 	ku->ku_flags |= KUF_EXITING;
4482b035cbeSJulian Elischer 	mtx_unlock_spin(&sched_lock);
4492b035cbeSJulian Elischer 	PROC_UNLOCK(p);
4502b035cbeSJulian Elischer 	error = suword(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE);
4512b035cbeSJulian Elischer 	PROC_LOCK(p);
4522b035cbeSJulian Elischer 	if (error)
4532b035cbeSJulian Elischer 		psignal(p, SIGSEGV);
4542b035cbeSJulian Elischer 	mtx_lock_spin(&sched_lock);
4555215b187SJeff Roberson 	upcall_remove(td);
4562b035cbeSJulian Elischer 	ke = td->td_kse;
457450c38d0SDavid Xu 	if (p->p_numthreads == 1) {
4585215b187SJeff Roberson 		kse_purge(p, td);
459ac2e4153SJulian Elischer 		p->p_flag &= ~P_THREADED;
4605c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
4615c8329edSJulian Elischer 		PROC_UNLOCK(p);
4625c8329edSJulian Elischer 	} else {
4635215b187SJeff Roberson 		if (kg->kg_numthreads == 1) { /* Shutdown a group */
4645215b187SJeff Roberson 			kse_purge_group(td);
465450c38d0SDavid Xu 			ke->ke_flags |= KEF_EXIT;
4665215b187SJeff Roberson 		}
467e574e444SDavid Xu 		thread_stopped(p);
4685c8329edSJulian Elischer 		thread_exit();
4695c8329edSJulian Elischer 		/* NOTREACHED */
4705c8329edSJulian Elischer 	}
4717b290dd0SDavid Xu 	return (0);
4725c8329edSJulian Elischer }
4735c8329edSJulian Elischer 
474696058c3SJulian Elischer /*
47593a7aa79SJulian Elischer  * Either becomes an upcall or waits for an awakening event and
4765215b187SJeff Roberson  * then becomes an upcall. Only error cases return.
4775215b187SJeff Roberson  */
4785215b187SJeff Roberson /*
4795215b187SJeff Roberson struct kse_release_args {
480eb117d5cSDavid Xu 	struct timespec *timeout;
4815215b187SJeff Roberson };
482696058c3SJulian Elischer */
4835c8329edSJulian Elischer int
4845c8329edSJulian Elischer kse_release(struct thread *td, struct kse_release_args *uap)
4855c8329edSJulian Elischer {
4865c8329edSJulian Elischer 	struct proc *p;
487696058c3SJulian Elischer 	struct ksegrp *kg;
488eb117d5cSDavid Xu 	struct timespec ts, ts2, ts3, timeout;
489eb117d5cSDavid Xu 	struct timeval tv;
490eb117d5cSDavid Xu 	int error;
4915c8329edSJulian Elischer 
4925c8329edSJulian Elischer 	p = td->td_proc;
493696058c3SJulian Elischer 	kg = td->td_ksegrp;
4941ecb38a3SDavid Xu 	if (td->td_upcall == NULL || TD_CAN_UNBIND(td))
4955c8329edSJulian Elischer 		return (EINVAL);
496eb117d5cSDavid Xu 	if (uap->timeout != NULL) {
497eb117d5cSDavid Xu 		if ((error = copyin(uap->timeout, &timeout, sizeof(timeout))))
498eb117d5cSDavid Xu 			return (error);
499eb117d5cSDavid Xu 		getnanouptime(&ts);
500eb117d5cSDavid Xu 		timespecadd(&ts, &timeout);
501eb117d5cSDavid Xu 		TIMESPEC_TO_TIMEVAL(&tv, &timeout);
502eb117d5cSDavid Xu 	}
50303ea4720SJulian Elischer 	mtx_lock_spin(&sched_lock);
50493a7aa79SJulian Elischer 	/* Change OURSELF to become an upcall. */
5055215b187SJeff Roberson 	td->td_flags = TDF_UPCALLING;
5064093529dSJeff Roberson #if 0	/* XXX This shouldn't be necessary */
50788aba94cSDavid Xu 	if (p->p_sflag & PS_NEEDSIGCHK)
50888aba94cSDavid Xu 		td->td_flags |= TDF_ASTPENDING;
5094093529dSJeff Roberson #endif
510eb117d5cSDavid Xu 	mtx_unlock_spin(&sched_lock);
511eb117d5cSDavid Xu 	PROC_LOCK(p);
512eb117d5cSDavid Xu 	while ((td->td_upcall->ku_flags & KUF_DOUPCALL) == 0 &&
51303ea4720SJulian Elischer 	       (kg->kg_completed == NULL)) {
5145215b187SJeff Roberson 		kg->kg_upsleeps++;
515eb117d5cSDavid Xu 		error = msleep(&kg->kg_completed, &p->p_mtx, PPAUSE|PCATCH,
516eb117d5cSDavid Xu 			"kse_rel", (uap->timeout ? tvtohz(&tv) : 0));
5175215b187SJeff Roberson 		kg->kg_upsleeps--;
5185215b187SJeff Roberson 		PROC_UNLOCK(p);
519eb117d5cSDavid Xu 		if (uap->timeout == NULL || error != EWOULDBLOCK)
520eb117d5cSDavid Xu 			return (0);
521eb117d5cSDavid Xu 		getnanouptime(&ts2);
522eb117d5cSDavid Xu 		if (timespeccmp(&ts2, &ts, >=))
523eb117d5cSDavid Xu 			return (0);
524eb117d5cSDavid Xu 		ts3 = ts;
525eb117d5cSDavid Xu 		timespecsub(&ts3, &ts2);
526eb117d5cSDavid Xu 		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
527eb117d5cSDavid Xu 		PROC_LOCK(p);
52893a7aa79SJulian Elischer 	}
529eb117d5cSDavid Xu 	PROC_UNLOCK(p);
530696058c3SJulian Elischer 	return (0);
5315c8329edSJulian Elischer }
5325c8329edSJulian Elischer 
5335c8329edSJulian Elischer /* struct kse_wakeup_args {
5345c8329edSJulian Elischer 	struct kse_mailbox *mbx;
5355c8329edSJulian Elischer }; */
5365c8329edSJulian Elischer int
5375c8329edSJulian Elischer kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
5385c8329edSJulian Elischer {
5395c8329edSJulian Elischer 	struct proc *p;
5405c8329edSJulian Elischer 	struct ksegrp *kg;
5415215b187SJeff Roberson 	struct kse_upcall *ku;
54293a7aa79SJulian Elischer 	struct thread *td2;
5435c8329edSJulian Elischer 
5445c8329edSJulian Elischer 	p = td->td_proc;
54593a7aa79SJulian Elischer 	td2 = NULL;
5465215b187SJeff Roberson 	ku = NULL;
5475c8329edSJulian Elischer 	/* KSE-enabled processes only, please. */
548ac2e4153SJulian Elischer 	if (!(p->p_flag & P_THREADED))
5495215b187SJeff Roberson 		return (EINVAL);
5505215b187SJeff Roberson 	PROC_LOCK(p);
55103ea4720SJulian Elischer 	mtx_lock_spin(&sched_lock);
5525c8329edSJulian Elischer 	if (uap->mbx) {
5535c8329edSJulian Elischer 		FOREACH_KSEGRP_IN_PROC(p, kg) {
5545215b187SJeff Roberson 			FOREACH_UPCALL_IN_GROUP(kg, ku) {
5555215b187SJeff Roberson 				if (ku->ku_mailbox == uap->mbx)
55693a7aa79SJulian Elischer 					break;
55793a7aa79SJulian Elischer 			}
5585215b187SJeff Roberson 			if (ku)
55993a7aa79SJulian Elischer 				break;
5605c8329edSJulian Elischer 		}
5615c8329edSJulian Elischer 	} else {
5625c8329edSJulian Elischer 		kg = td->td_ksegrp;
5635215b187SJeff Roberson 		if (kg->kg_upsleeps) {
5645215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
5655215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
5665215b187SJeff Roberson 			PROC_UNLOCK(p);
5675215b187SJeff Roberson 			return (0);
5685c8329edSJulian Elischer 		}
5695215b187SJeff Roberson 		ku = TAILQ_FIRST(&kg->kg_upcalls);
5705c8329edSJulian Elischer 	}
5715215b187SJeff Roberson 	if (ku) {
5725215b187SJeff Roberson 		if ((td2 = ku->ku_owner) == NULL) {
5735215b187SJeff Roberson 			panic("%s: no owner", __func__);
5745215b187SJeff Roberson 		} else if (TD_ON_SLEEPQ(td2) &&
5755215b187SJeff Roberson 		           (td2->td_wchan == &kg->kg_completed)) {
5765215b187SJeff Roberson 			abortsleep(td2);
5775215b187SJeff Roberson 		} else {
5785215b187SJeff Roberson 			ku->ku_flags |= KUF_DOUPCALL;
57903ea4720SJulian Elischer 		}
5805c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
5815215b187SJeff Roberson 		PROC_UNLOCK(p);
5827b290dd0SDavid Xu 		return (0);
5835c8329edSJulian Elischer 	}
58493a7aa79SJulian Elischer 	mtx_unlock_spin(&sched_lock);
5855215b187SJeff Roberson 	PROC_UNLOCK(p);
58693a7aa79SJulian Elischer 	return (ESRCH);
58793a7aa79SJulian Elischer }
5885c8329edSJulian Elischer 
5895c8329edSJulian Elischer /*
5905c8329edSJulian Elischer  * No new KSEG: first call: use current KSE, don't schedule an upcall
5915215b187SJeff Roberson  * All other situations, do allocate max new KSEs and schedule an upcall.
5925c8329edSJulian Elischer  */
5935c8329edSJulian Elischer /* struct kse_create_args {
5945c8329edSJulian Elischer 	struct kse_mailbox *mbx;
5955c8329edSJulian Elischer 	int newgroup;
5965c8329edSJulian Elischer }; */
5975c8329edSJulian Elischer int
5985c8329edSJulian Elischer kse_create(struct thread *td, struct kse_create_args *uap)
5995c8329edSJulian Elischer {
6005c8329edSJulian Elischer 	struct kse *newke;
6015c8329edSJulian Elischer 	struct ksegrp *newkg;
6025c8329edSJulian Elischer 	struct ksegrp *kg;
6035c8329edSJulian Elischer 	struct proc *p;
6045c8329edSJulian Elischer 	struct kse_mailbox mbx;
6055215b187SJeff Roberson 	struct kse_upcall *newku;
6065215b187SJeff Roberson 	int err, ncpus;
6075c8329edSJulian Elischer 
6085c8329edSJulian Elischer 	p = td->td_proc;
6095c8329edSJulian Elischer 	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
6105c8329edSJulian Elischer 		return (err);
6115c8329edSJulian Elischer 
6125215b187SJeff Roberson 	/* Too bad, why hasn't kernel always a cpu counter !? */
6135215b187SJeff Roberson #ifdef SMP
6145215b187SJeff Roberson 	ncpus = mp_ncpus;
6155215b187SJeff Roberson #else
6165215b187SJeff Roberson 	ncpus = 1;
6175215b187SJeff Roberson #endif
6185215b187SJeff Roberson 	if (thread_debug && virtual_cpu != 0)
6195215b187SJeff Roberson 		ncpus = virtual_cpu;
6205215b187SJeff Roberson 
6215215b187SJeff Roberson 	/* Easier to just set it than to test and set */
622661db6daSDavid Xu 	PROC_LOCK(p);
623ac2e4153SJulian Elischer 	p->p_flag |= P_THREADED;
624661db6daSDavid Xu 	PROC_UNLOCK(p);
6255c8329edSJulian Elischer 	kg = td->td_ksegrp;
6265c8329edSJulian Elischer 	if (uap->newgroup) {
6275215b187SJeff Roberson 		/* Have race condition but it is cheap */
628fdc5ecd2SDavid Xu 		if (p->p_numksegrps >= max_groups_per_proc)
629fdc5ecd2SDavid Xu 			return (EPROCLIM);
6305c8329edSJulian Elischer 		/*
6315c8329edSJulian Elischer 		 * If we want a new KSEGRP it doesn't matter whether
6325c8329edSJulian Elischer 		 * we have already fired up KSE mode before or not.
6335215b187SJeff Roberson 		 * We put the process in KSE mode and create a new KSEGRP.
6345c8329edSJulian Elischer 		 */
6355c8329edSJulian Elischer 		newkg = ksegrp_alloc();
6365c8329edSJulian Elischer 		bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
6375c8329edSJulian Elischer 		      kg_startzero, kg_endzero));
6385c8329edSJulian Elischer 		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
6395c8329edSJulian Elischer 		      RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
6405215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
6415215b187SJeff Roberson 		if (p->p_numksegrps >= max_groups_per_proc) {
6425215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
6439948c47fSDavid Xu 			ksegrp_free(newkg);
6446f8132a8SJulian Elischer 			return (EPROCLIM);
6456f8132a8SJulian Elischer 		}
6469948c47fSDavid Xu 		ksegrp_link(newkg, p);
6475215b187SJeff Roberson 		mtx_unlock_spin(&sched_lock);
6486f8132a8SJulian Elischer 	} else {
6495215b187SJeff Roberson 		newkg = kg;
6506f8132a8SJulian Elischer 	}
6515215b187SJeff Roberson 
6525215b187SJeff Roberson 	/*
6535215b187SJeff Roberson 	 * Creating upcalls more than number of physical cpu does
6545215b187SJeff Roberson 	 * not help performance.
6555215b187SJeff Roberson 	 */
6565215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus)
6575215b187SJeff Roberson 		return (EPROCLIM);
6585215b187SJeff Roberson 
6595215b187SJeff Roberson 	if (newkg->kg_numupcalls == 0) {
6605215b187SJeff Roberson 		/*
6615215b187SJeff Roberson 		 * Initialize KSE group, optimized for MP.
6625215b187SJeff Roberson 		 * Create KSEs as many as physical cpus, this increases
6635215b187SJeff Roberson 		 * concurrent even if userland is not MP safe and can only run
6645215b187SJeff Roberson 		 * on single CPU (for early version of libpthread, it is true).
6655215b187SJeff Roberson 		 * In ideal world, every physical cpu should execute a thread.
6665215b187SJeff Roberson 		 * If there is enough KSEs, threads in kernel can be
6675215b187SJeff Roberson 		 * executed parallel on different cpus with full speed,
6685215b187SJeff Roberson 		 * Concurrent in kernel shouldn't be restricted by number of
6695215b187SJeff Roberson 		 * upcalls userland provides.
6705215b187SJeff Roberson 		 * Adding more upcall structures only increases concurrent
6715215b187SJeff Roberson 		 * in userland.
6725215b187SJeff Roberson 		 * Highest performance configuration is:
6735215b187SJeff Roberson 		 * N kses = N upcalls = N phyiscal cpus
6745215b187SJeff Roberson 		 */
6755215b187SJeff Roberson 		while (newkg->kg_kses < ncpus) {
6765215b187SJeff Roberson 			newke = kse_alloc();
6775c8329edSJulian Elischer 			bzero(&newke->ke_startzero, RANGEOF(struct kse,
6785c8329edSJulian Elischer 			      ke_startzero, ke_endzero));
6795c8329edSJulian Elischer #if 0
6805215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
6815c8329edSJulian Elischer 			bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
6825c8329edSJulian Elischer 			      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
6836f8132a8SJulian Elischer 			mtx_unlock_spin(&sched_lock);
6845215b187SJeff Roberson #endif
6855215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
6865c8329edSJulian Elischer 			kse_link(newke, newkg);
6875215b187SJeff Roberson 			/* Add engine */
6885215b187SJeff Roberson 			kse_reassign(newke);
6895c8329edSJulian Elischer 			mtx_unlock_spin(&sched_lock);
6905215b187SJeff Roberson 		}
6915215b187SJeff Roberson 	}
6925215b187SJeff Roberson 	newku = upcall_alloc();
6935215b187SJeff Roberson 	newku->ku_mailbox = uap->mbx;
6945215b187SJeff Roberson 	newku->ku_func = mbx.km_func;
6955215b187SJeff Roberson 	bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t));
6965215b187SJeff Roberson 
6975215b187SJeff Roberson 	/* For the first call this may not have been set */
6985215b187SJeff Roberson 	if (td->td_standin == NULL)
6995215b187SJeff Roberson 		thread_alloc_spare(td, NULL);
7005215b187SJeff Roberson 
7015215b187SJeff Roberson 	mtx_lock_spin(&sched_lock);
7025215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus) {
7035215b187SJeff Roberson 		mtx_unlock_spin(&sched_lock);
7043b3df40fSDavid Xu 		upcall_free(newku);
7055215b187SJeff Roberson 		return (EPROCLIM);
7065215b187SJeff Roberson 	}
7075215b187SJeff Roberson 	upcall_link(newku, newkg);
7086ce75196SDavid Xu 	if (mbx.km_quantum)
7096ce75196SDavid Xu 		newkg->kg_upquantum = max(1, mbx.km_quantum/tick);
7105215b187SJeff Roberson 
7115215b187SJeff Roberson 	/*
7125215b187SJeff Roberson 	 * Each upcall structure has an owner thread, find which
7135215b187SJeff Roberson 	 * one owns it.
7145215b187SJeff Roberson 	 */
7155215b187SJeff Roberson 	if (uap->newgroup) {
7165215b187SJeff Roberson 		/*
7175215b187SJeff Roberson 		 * Because new ksegrp hasn't thread,
7185215b187SJeff Roberson 		 * create an initial upcall thread to own it.
7195215b187SJeff Roberson 		 */
7205215b187SJeff Roberson 		thread_schedule_upcall(td, newku);
7215c8329edSJulian Elischer 	} else {
7225c8329edSJulian Elischer 		/*
7235215b187SJeff Roberson 		 * If current thread hasn't an upcall structure,
7245215b187SJeff Roberson 		 * just assign the upcall to it.
7255c8329edSJulian Elischer 		 */
7265215b187SJeff Roberson 		if (td->td_upcall == NULL) {
7275215b187SJeff Roberson 			newku->ku_owner = td;
7285215b187SJeff Roberson 			td->td_upcall = newku;
7295215b187SJeff Roberson 		} else {
7305c8329edSJulian Elischer 			/*
7315215b187SJeff Roberson 			 * Create a new upcall thread to own it.
7325c8329edSJulian Elischer 			 */
7335215b187SJeff Roberson 			thread_schedule_upcall(td, newku);
7345215b187SJeff Roberson 		}
7355215b187SJeff Roberson 	}
7365215b187SJeff Roberson 	mtx_unlock_spin(&sched_lock);
7375c8329edSJulian Elischer 	return (0);
7385c8329edSJulian Elischer }
7395c8329edSJulian Elischer 
7405c8329edSJulian Elischer /*
74144990b8cSJulian Elischer  * Initialize global thread allocation resources.
74244990b8cSJulian Elischer  */
74344990b8cSJulian Elischer void
74444990b8cSJulian Elischer threadinit(void)
74544990b8cSJulian Elischer {
74644990b8cSJulian Elischer 
747de028f5aSJeff Roberson 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
74844990b8cSJulian Elischer 	    thread_ctor, thread_dtor, thread_init, thread_fini,
74944990b8cSJulian Elischer 	    UMA_ALIGN_CACHE, 0);
750de028f5aSJeff Roberson 	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
751de028f5aSJeff Roberson 	    NULL, NULL, ksegrp_init, NULL,
7524f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
753de028f5aSJeff Roberson 	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
754de028f5aSJeff Roberson 	    NULL, NULL, kse_init, NULL,
7554f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
7565215b187SJeff Roberson 	upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall),
7575215b187SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
75844990b8cSJulian Elischer }
75944990b8cSJulian Elischer 
76044990b8cSJulian Elischer /*
7611faf202eSJulian Elischer  * Stash an embarasingly extra thread into the zombie thread queue.
76244990b8cSJulian Elischer  */
76344990b8cSJulian Elischer void
76444990b8cSJulian Elischer thread_stash(struct thread *td)
76544990b8cSJulian Elischer {
7665215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
76744990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
7685215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
76944990b8cSJulian Elischer }
77044990b8cSJulian Elischer 
77144990b8cSJulian Elischer /*
7725c8329edSJulian Elischer  * Stash an embarasingly extra kse into the zombie kse queue.
7735c8329edSJulian Elischer  */
7745c8329edSJulian Elischer void
7755c8329edSJulian Elischer kse_stash(struct kse *ke)
7765c8329edSJulian Elischer {
7775215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
7785c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
7795215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
7805215b187SJeff Roberson }
7815215b187SJeff Roberson 
7825215b187SJeff Roberson /*
7835215b187SJeff Roberson  * Stash an embarasingly extra upcall into the zombie upcall queue.
7845215b187SJeff Roberson  */
7855215b187SJeff Roberson 
7865215b187SJeff Roberson void
7875215b187SJeff Roberson upcall_stash(struct kse_upcall *ku)
7885215b187SJeff Roberson {
7895215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
7905215b187SJeff Roberson 	TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link);
7915215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
7925c8329edSJulian Elischer }
7935c8329edSJulian Elischer 
7945c8329edSJulian Elischer /*
7955c8329edSJulian Elischer  * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
7965c8329edSJulian Elischer  */
7975c8329edSJulian Elischer void
7985c8329edSJulian Elischer ksegrp_stash(struct ksegrp *kg)
7995c8329edSJulian Elischer {
8005215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
8015c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
8025215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
8035c8329edSJulian Elischer }
8045c8329edSJulian Elischer 
8055c8329edSJulian Elischer /*
8065215b187SJeff Roberson  * Reap zombie kse resource.
80744990b8cSJulian Elischer  */
80844990b8cSJulian Elischer void
80944990b8cSJulian Elischer thread_reap(void)
81044990b8cSJulian Elischer {
8115c8329edSJulian Elischer 	struct thread *td_first, *td_next;
8125c8329edSJulian Elischer 	struct kse *ke_first, *ke_next;
8135c8329edSJulian Elischer 	struct ksegrp *kg_first, * kg_next;
8145215b187SJeff Roberson 	struct kse_upcall *ku_first, *ku_next;
81544990b8cSJulian Elischer 
81644990b8cSJulian Elischer 	/*
8175215b187SJeff Roberson 	 * Don't even bother to lock if none at this instant,
8185215b187SJeff Roberson 	 * we really don't care about the next instant..
81944990b8cSJulian Elischer 	 */
8205c8329edSJulian Elischer 	if ((!TAILQ_EMPTY(&zombie_threads))
8215c8329edSJulian Elischer 	    || (!TAILQ_EMPTY(&zombie_kses))
8225215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_ksegrps))
8235215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_upcalls))) {
8245215b187SJeff Roberson 		mtx_lock_spin(&kse_zombie_lock);
8255c8329edSJulian Elischer 		td_first = TAILQ_FIRST(&zombie_threads);
8265c8329edSJulian Elischer 		ke_first = TAILQ_FIRST(&zombie_kses);
8275c8329edSJulian Elischer 		kg_first = TAILQ_FIRST(&zombie_ksegrps);
8285215b187SJeff Roberson 		ku_first = TAILQ_FIRST(&zombie_upcalls);
8295c8329edSJulian Elischer 		if (td_first)
8305c8329edSJulian Elischer 			TAILQ_INIT(&zombie_threads);
8315c8329edSJulian Elischer 		if (ke_first)
8325c8329edSJulian Elischer 			TAILQ_INIT(&zombie_kses);
8335c8329edSJulian Elischer 		if (kg_first)
8345c8329edSJulian Elischer 			TAILQ_INIT(&zombie_ksegrps);
8355215b187SJeff Roberson 		if (ku_first)
8365215b187SJeff Roberson 			TAILQ_INIT(&zombie_upcalls);
8375215b187SJeff Roberson 		mtx_unlock_spin(&kse_zombie_lock);
8385c8329edSJulian Elischer 		while (td_first) {
8395c8329edSJulian Elischer 			td_next = TAILQ_NEXT(td_first, td_runq);
8405215b187SJeff Roberson 			if (td_first->td_ucred)
8415215b187SJeff Roberson 				crfree(td_first->td_ucred);
8425c8329edSJulian Elischer 			thread_free(td_first);
8435c8329edSJulian Elischer 			td_first = td_next;
84444990b8cSJulian Elischer 		}
8455c8329edSJulian Elischer 		while (ke_first) {
8465c8329edSJulian Elischer 			ke_next = TAILQ_NEXT(ke_first, ke_procq);
8475c8329edSJulian Elischer 			kse_free(ke_first);
8485c8329edSJulian Elischer 			ke_first = ke_next;
8495c8329edSJulian Elischer 		}
8505c8329edSJulian Elischer 		while (kg_first) {
8515c8329edSJulian Elischer 			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
8525c8329edSJulian Elischer 			ksegrp_free(kg_first);
8535c8329edSJulian Elischer 			kg_first = kg_next;
8545c8329edSJulian Elischer 		}
8555215b187SJeff Roberson 		while (ku_first) {
8565215b187SJeff Roberson 			ku_next = TAILQ_NEXT(ku_first, ku_link);
8575215b187SJeff Roberson 			upcall_free(ku_first);
8585215b187SJeff Roberson 			ku_first = ku_next;
8595215b187SJeff Roberson 		}
86044990b8cSJulian Elischer 	}
86144990b8cSJulian Elischer }
86244990b8cSJulian Elischer 
86344990b8cSJulian Elischer /*
8644f0db5e0SJulian Elischer  * Allocate a ksegrp.
8654f0db5e0SJulian Elischer  */
8664f0db5e0SJulian Elischer struct ksegrp *
8674f0db5e0SJulian Elischer ksegrp_alloc(void)
8684f0db5e0SJulian Elischer {
869a163d034SWarner Losh 	return (uma_zalloc(ksegrp_zone, M_WAITOK));
8704f0db5e0SJulian Elischer }
8714f0db5e0SJulian Elischer 
8724f0db5e0SJulian Elischer /*
8734f0db5e0SJulian Elischer  * Allocate a kse.
8744f0db5e0SJulian Elischer  */
8754f0db5e0SJulian Elischer struct kse *
8764f0db5e0SJulian Elischer kse_alloc(void)
8774f0db5e0SJulian Elischer {
878a163d034SWarner Losh 	return (uma_zalloc(kse_zone, M_WAITOK));
8794f0db5e0SJulian Elischer }
8804f0db5e0SJulian Elischer 
8814f0db5e0SJulian Elischer /*
88244990b8cSJulian Elischer  * Allocate a thread.
88344990b8cSJulian Elischer  */
88444990b8cSJulian Elischer struct thread *
88544990b8cSJulian Elischer thread_alloc(void)
88644990b8cSJulian Elischer {
88744990b8cSJulian Elischer 	thread_reap(); /* check if any zombies to get */
888a163d034SWarner Losh 	return (uma_zalloc(thread_zone, M_WAITOK));
88944990b8cSJulian Elischer }
89044990b8cSJulian Elischer 
89144990b8cSJulian Elischer /*
8924f0db5e0SJulian Elischer  * Deallocate a ksegrp.
8934f0db5e0SJulian Elischer  */
8944f0db5e0SJulian Elischer void
8954f0db5e0SJulian Elischer ksegrp_free(struct ksegrp *td)
8964f0db5e0SJulian Elischer {
8974f0db5e0SJulian Elischer 	uma_zfree(ksegrp_zone, td);
8984f0db5e0SJulian Elischer }
8994f0db5e0SJulian Elischer 
9004f0db5e0SJulian Elischer /*
9014f0db5e0SJulian Elischer  * Deallocate a kse.
9024f0db5e0SJulian Elischer  */
9034f0db5e0SJulian Elischer void
9044f0db5e0SJulian Elischer kse_free(struct kse *td)
9054f0db5e0SJulian Elischer {
9064f0db5e0SJulian Elischer 	uma_zfree(kse_zone, td);
9074f0db5e0SJulian Elischer }
9084f0db5e0SJulian Elischer 
9094f0db5e0SJulian Elischer /*
91044990b8cSJulian Elischer  * Deallocate a thread.
91144990b8cSJulian Elischer  */
91244990b8cSJulian Elischer void
91344990b8cSJulian Elischer thread_free(struct thread *td)
91444990b8cSJulian Elischer {
915696058c3SJulian Elischer 
916696058c3SJulian Elischer 	cpu_thread_clean(td);
91744990b8cSJulian Elischer 	uma_zfree(thread_zone, td);
91844990b8cSJulian Elischer }
91944990b8cSJulian Elischer 
92044990b8cSJulian Elischer /*
92144990b8cSJulian Elischer  * Store the thread context in the UTS's mailbox.
9223d0586d4SJulian Elischer  * then add the mailbox at the head of a list we are building in user space.
9233d0586d4SJulian Elischer  * The list is anchored in the ksegrp structure.
92444990b8cSJulian Elischer  */
92544990b8cSJulian Elischer int
92644990b8cSJulian Elischer thread_export_context(struct thread *td)
92744990b8cSJulian Elischer {
9280d294460SJuli Mallett 	struct proc *p;
9293d0586d4SJulian Elischer 	struct ksegrp *kg;
9303d0586d4SJulian Elischer 	uintptr_t mbx;
9313d0586d4SJulian Elischer 	void *addr;
9325215b187SJeff Roberson 	int error,temp;
9332b035cbeSJulian Elischer 	mcontext_t mc;
93444990b8cSJulian Elischer 
9350d294460SJuli Mallett 	p = td->td_proc;
9360d294460SJuli Mallett 	kg = td->td_ksegrp;
9370d294460SJuli Mallett 
938c76e33b6SJonathan Mini 	/* Export the user/machine context. */
9392b035cbeSJulian Elischer 	get_mcontext(td, &mc, 0);
9402b035cbeSJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext);
9412b035cbeSJulian Elischer 	error = copyout(&mc, addr, sizeof(mcontext_t));
94293a7aa79SJulian Elischer 	if (error)
94393a7aa79SJulian Elischer 		goto bad;
94444990b8cSJulian Elischer 
9455215b187SJeff Roberson 	/* Exports clock ticks in kernel mode */
9465215b187SJeff Roberson 	addr = (caddr_t)(&td->td_mailbox->tm_sticks);
9475215b187SJeff Roberson 	temp = fuword(addr) + td->td_usticks;
9482b035cbeSJulian Elischer 	if (suword(addr, temp)) {
9492b035cbeSJulian Elischer 		error = EFAULT;
9505215b187SJeff Roberson 		goto bad;
9512b035cbeSJulian Elischer 	}
9525215b187SJeff Roberson 
9535215b187SJeff Roberson 	/* Get address in latest mbox of list pointer */
9543d0586d4SJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_next);
9553d0586d4SJulian Elischer 	/*
9563d0586d4SJulian Elischer 	 * Put the saved address of the previous first
9573d0586d4SJulian Elischer 	 * entry into this one
9583d0586d4SJulian Elischer 	 */
9593d0586d4SJulian Elischer 	for (;;) {
9603d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
9613d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
96293a7aa79SJulian Elischer 			error = EFAULT;
9638798d4f9SDavid Xu 			goto bad;
9643d0586d4SJulian Elischer 		}
9650cd3964fSJulian Elischer 		PROC_LOCK(p);
9663d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
9673d0586d4SJulian Elischer 			kg->kg_completed = td->td_mailbox;
9685215b187SJeff Roberson 			/*
9695215b187SJeff Roberson 			 * The thread context may be taken away by
9705215b187SJeff Roberson 			 * other upcall threads when we unlock
9715215b187SJeff Roberson 			 * process lock. it's no longer valid to
9725215b187SJeff Roberson 			 * use it again in any other places.
9735215b187SJeff Roberson 			 */
9745215b187SJeff Roberson 			td->td_mailbox = NULL;
9750cd3964fSJulian Elischer 			PROC_UNLOCK(p);
9763d0586d4SJulian Elischer 			break;
9773d0586d4SJulian Elischer 		}
9780cd3964fSJulian Elischer 		PROC_UNLOCK(p);
9793d0586d4SJulian Elischer 	}
9805215b187SJeff Roberson 	td->td_usticks = 0;
9813d0586d4SJulian Elischer 	return (0);
9828798d4f9SDavid Xu 
9838798d4f9SDavid Xu bad:
9848798d4f9SDavid Xu 	PROC_LOCK(p);
9858798d4f9SDavid Xu 	psignal(p, SIGSEGV);
9868798d4f9SDavid Xu 	PROC_UNLOCK(p);
9875215b187SJeff Roberson 	/* The mailbox is bad, don't use it */
9885215b187SJeff Roberson 	td->td_mailbox = NULL;
9895215b187SJeff Roberson 	td->td_usticks = 0;
99093a7aa79SJulian Elischer 	return (error);
9913d0586d4SJulian Elischer }
99244990b8cSJulian Elischer 
9933d0586d4SJulian Elischer /*
9943d0586d4SJulian Elischer  * Take the list of completed mailboxes for this KSEGRP and put them on this
9955215b187SJeff Roberson  * upcall's mailbox as it's the next one going up.
9963d0586d4SJulian Elischer  */
9973d0586d4SJulian Elischer static int
9985215b187SJeff Roberson thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku)
9993d0586d4SJulian Elischer {
10000cd3964fSJulian Elischer 	struct proc *p = kg->kg_proc;
10013d0586d4SJulian Elischer 	void *addr;
10023d0586d4SJulian Elischer 	uintptr_t mbx;
10033d0586d4SJulian Elischer 
10045215b187SJeff Roberson 	addr = (void *)(&ku->ku_mailbox->km_completed);
10053d0586d4SJulian Elischer 	for (;;) {
10063d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
10073d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
10080cd3964fSJulian Elischer 			PROC_LOCK(p);
10090cd3964fSJulian Elischer 			psignal(p, SIGSEGV);
10100cd3964fSJulian Elischer 			PROC_UNLOCK(p);
10113d0586d4SJulian Elischer 			return (EFAULT);
10123d0586d4SJulian Elischer 		}
10130cd3964fSJulian Elischer 		PROC_LOCK(p);
10143d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
10153d0586d4SJulian Elischer 			kg->kg_completed = NULL;
10160cd3964fSJulian Elischer 			PROC_UNLOCK(p);
10173d0586d4SJulian Elischer 			break;
10183d0586d4SJulian Elischer 		}
10190cd3964fSJulian Elischer 		PROC_UNLOCK(p);
10203d0586d4SJulian Elischer 	}
10213d0586d4SJulian Elischer 	return (0);
10223d0586d4SJulian Elischer }
102344990b8cSJulian Elischer 
102444990b8cSJulian Elischer /*
10258798d4f9SDavid Xu  * This function should be called at statclock interrupt time
10268798d4f9SDavid Xu  */
10278798d4f9SDavid Xu int
10285215b187SJeff Roberson thread_statclock(int user)
10298798d4f9SDavid Xu {
10308798d4f9SDavid Xu 	struct thread *td = curthread;
10318798d4f9SDavid Xu 
10325215b187SJeff Roberson 	if (td->td_ksegrp->kg_numupcalls == 0)
10335215b187SJeff Roberson 		return (-1);
10348798d4f9SDavid Xu 	if (user) {
10358798d4f9SDavid Xu 		/* Current always do via ast() */
1036b4508d7dSDavid Xu 		mtx_lock_spin(&sched_lock);
10374a338afdSJulian Elischer 		td->td_flags |= (TDF_USTATCLOCK|TDF_ASTPENDING);
1038b4508d7dSDavid Xu 		mtx_unlock_spin(&sched_lock);
10395215b187SJeff Roberson 		td->td_uuticks++;
10408798d4f9SDavid Xu 	} else {
10418798d4f9SDavid Xu 		if (td->td_mailbox != NULL)
10425215b187SJeff Roberson 			td->td_usticks++;
10435215b187SJeff Roberson 		else {
10445215b187SJeff Roberson 			/* XXXKSE
10455215b187SJeff Roberson 		 	 * We will call thread_user_enter() for every
10465215b187SJeff Roberson 			 * kernel entry in future, so if the thread mailbox
10475215b187SJeff Roberson 			 * is NULL, it must be a UTS kernel, don't account
10485215b187SJeff Roberson 			 * clock ticks for it.
10495215b187SJeff Roberson 			 */
10508798d4f9SDavid Xu 		}
10515215b187SJeff Roberson 	}
10525215b187SJeff Roberson 	return (0);
10538798d4f9SDavid Xu }
10548798d4f9SDavid Xu 
10555215b187SJeff Roberson /*
10564b4866edSDavid Xu  * Export state clock ticks for userland
10575215b187SJeff Roberson  */
10588798d4f9SDavid Xu static int
10594b4866edSDavid Xu thread_update_usr_ticks(struct thread *td, int user)
10608798d4f9SDavid Xu {
10618798d4f9SDavid Xu 	struct proc *p = td->td_proc;
10628798d4f9SDavid Xu 	struct kse_thr_mailbox *tmbx;
10635215b187SJeff Roberson 	struct kse_upcall *ku;
10646ce75196SDavid Xu 	struct ksegrp *kg;
10658798d4f9SDavid Xu 	caddr_t addr;
10665215b187SJeff Roberson 	uint uticks;
10678798d4f9SDavid Xu 
10685215b187SJeff Roberson 	if ((ku = td->td_upcall) == NULL)
10695215b187SJeff Roberson 		return (-1);
10708798d4f9SDavid Xu 
10715215b187SJeff Roberson 	tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
10728798d4f9SDavid Xu 	if ((tmbx == NULL) || (tmbx == (void *)-1))
10735215b187SJeff Roberson 		return (-1);
10744b4866edSDavid Xu 	if (user) {
10755215b187SJeff Roberson 		uticks = td->td_uuticks;
10765215b187SJeff Roberson 		td->td_uuticks = 0;
10775215b187SJeff Roberson 		addr = (caddr_t)&tmbx->tm_uticks;
10784b4866edSDavid Xu 	} else {
10794b4866edSDavid Xu 		uticks = td->td_usticks;
10805215b187SJeff Roberson 		td->td_usticks = 0;
10814b4866edSDavid Xu 		addr = (caddr_t)&tmbx->tm_sticks;
10824b4866edSDavid Xu 	}
10834b4866edSDavid Xu 	if (uticks) {
10844b4866edSDavid Xu 		if (suword(addr, uticks+fuword(addr))) {
10855215b187SJeff Roberson 			PROC_LOCK(p);
10865215b187SJeff Roberson 			psignal(p, SIGSEGV);
10875215b187SJeff Roberson 			PROC_UNLOCK(p);
10885215b187SJeff Roberson 			return (-2);
10895215b187SJeff Roberson 		}
10904b4866edSDavid Xu 	}
10916ce75196SDavid Xu 	kg = td->td_ksegrp;
10926ce75196SDavid Xu 	if (kg->kg_upquantum && ticks >= kg->kg_nextupcall) {
10934b4866edSDavid Xu 		mtx_lock_spin(&sched_lock);
10944b4866edSDavid Xu 		td->td_upcall->ku_flags |= KUF_DOUPCALL;
10954b4866edSDavid Xu 		mtx_unlock_spin(&sched_lock);
10964b4866edSDavid Xu 	}
10975215b187SJeff Roberson 	return (0);
10988798d4f9SDavid Xu }
10998798d4f9SDavid Xu 
11008798d4f9SDavid Xu /*
110144990b8cSJulian Elischer  * Discard the current thread and exit from its context.
110244990b8cSJulian Elischer  *
110344990b8cSJulian Elischer  * Because we can't free a thread while we're operating under its context,
1104696058c3SJulian Elischer  * push the current thread into our CPU's deadthread holder. This means
1105696058c3SJulian Elischer  * we needn't worry about someone else grabbing our context before we
1106696058c3SJulian Elischer  * do a cpu_throw().
110744990b8cSJulian Elischer  */
110844990b8cSJulian Elischer void
110944990b8cSJulian Elischer thread_exit(void)
111044990b8cSJulian Elischer {
111144990b8cSJulian Elischer 	struct thread *td;
111244990b8cSJulian Elischer 	struct kse *ke;
111344990b8cSJulian Elischer 	struct proc *p;
111444990b8cSJulian Elischer 	struct ksegrp	*kg;
111544990b8cSJulian Elischer 
111644990b8cSJulian Elischer 	td = curthread;
111744990b8cSJulian Elischer 	kg = td->td_ksegrp;
111844990b8cSJulian Elischer 	p = td->td_proc;
111944990b8cSJulian Elischer 	ke = td->td_kse;
112044990b8cSJulian Elischer 
112144990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
112288151aa3SJulian Elischer 	KASSERT(p != NULL, ("thread exiting without a process"));
112388151aa3SJulian Elischer 	KASSERT(ke != NULL, ("thread exiting without a kse"));
112488151aa3SJulian Elischer 	KASSERT(kg != NULL, ("thread exiting without a kse group"));
112544990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
112644990b8cSJulian Elischer 	CTR1(KTR_PROC, "thread_exit: thread %p", td);
112744990b8cSJulian Elischer 	KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
112844990b8cSJulian Elischer 
112948bfcdddSJulian Elischer 	if (td->td_standin != NULL) {
113048bfcdddSJulian Elischer 		thread_stash(td->td_standin);
113148bfcdddSJulian Elischer 		td->td_standin = NULL;
113248bfcdddSJulian Elischer 	}
113348bfcdddSJulian Elischer 
113444990b8cSJulian Elischer 	cpu_thread_exit(td);	/* XXXSMP */
113544990b8cSJulian Elischer 
11361faf202eSJulian Elischer 	/*
11371faf202eSJulian Elischer 	 * The last thread is left attached to the process
11381faf202eSJulian Elischer 	 * So that the whole bundle gets recycled. Skip
11391faf202eSJulian Elischer 	 * all this stuff.
11401faf202eSJulian Elischer 	 */
11411faf202eSJulian Elischer 	if (p->p_numthreads > 1) {
1142d3a0bd78SJulian Elischer 		thread_unlink(td);
11430252d203SDavid Xu 		if (p->p_maxthrwaits)
11440252d203SDavid Xu 			wakeup(&p->p_numthreads);
114544990b8cSJulian Elischer 		/*
114644990b8cSJulian Elischer 		 * The test below is NOT true if we are the
11471faf202eSJulian Elischer 		 * sole exiting thread. P_STOPPED_SNGL is unset
114844990b8cSJulian Elischer 		 * in exit1() after it is the only survivor.
114944990b8cSJulian Elischer 		 */
11501279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
115144990b8cSJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
115271fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
115344990b8cSJulian Elischer 			}
115444990b8cSJulian Elischer 		}
115548bfcdddSJulian Elischer 
11565215b187SJeff Roberson 		/*
11575215b187SJeff Roberson 		 * Because each upcall structure has an owner thread,
11585215b187SJeff Roberson 		 * owner thread exits only when process is in exiting
11595215b187SJeff Roberson 		 * state, so upcall to userland is no longer needed,
11605215b187SJeff Roberson 		 * deleting upcall structure is safe here.
11615215b187SJeff Roberson 		 * So when all threads in a group is exited, all upcalls
11625215b187SJeff Roberson 		 * in the group should be automatically freed.
11635215b187SJeff Roberson 		 */
11645215b187SJeff Roberson 		if (td->td_upcall)
11655215b187SJeff Roberson 			upcall_remove(td);
11666f8132a8SJulian Elischer 
11675215b187SJeff Roberson 		ke->ke_state = KES_UNQUEUED;
11685215b187SJeff Roberson 		ke->ke_thread = NULL;
116948bfcdddSJulian Elischer 		/*
117093a7aa79SJulian Elischer 		 * Decide what to do with the KSE attached to this thread.
117148bfcdddSJulian Elischer 		 */
11725215b187SJeff Roberson 		if (ke->ke_flags & KEF_EXIT)
11736f8132a8SJulian Elischer 			kse_unlink(ke);
11745215b187SJeff Roberson 		else
11756f8132a8SJulian Elischer 			kse_reassign(ke);
11766f8132a8SJulian Elischer 		PROC_UNLOCK(p);
11775215b187SJeff Roberson 		td->td_kse	= NULL;
11785c8329edSJulian Elischer 		td->td_state	= TDS_INACTIVE;
117936f7b36fSDavid Xu #if 0
11805c8329edSJulian Elischer 		td->td_proc	= NULL;
118136f7b36fSDavid Xu #endif
11825c8329edSJulian Elischer 		td->td_ksegrp	= NULL;
11835c8329edSJulian Elischer 		td->td_last_kse	= NULL;
1184696058c3SJulian Elischer 		PCPU_SET(deadthread, td);
11851faf202eSJulian Elischer 	} else {
11861faf202eSJulian Elischer 		PROC_UNLOCK(p);
11871faf202eSJulian Elischer 	}
11884093529dSJeff Roberson 	/* XXX Shouldn't cpu_throw() here. */
1189cc66ebe2SPeter Wemm 	mtx_assert(&sched_lock, MA_OWNED);
1190f2c49dd2SMarcel Moolenaar #if !defined(__alpha__) && !defined(__powerpc__)
1191cc66ebe2SPeter Wemm 	cpu_throw(td, choosethread());
1192cc66ebe2SPeter Wemm #else
119344990b8cSJulian Elischer 	cpu_throw();
1194cc66ebe2SPeter Wemm #endif
1195cc66ebe2SPeter Wemm 	panic("I'm a teapot!");
119644990b8cSJulian Elischer 	/* NOTREACHED */
119744990b8cSJulian Elischer }
119844990b8cSJulian Elischer 
119944990b8cSJulian Elischer /*
1200696058c3SJulian Elischer  * Do any thread specific cleanups that may be needed in wait()
1201696058c3SJulian Elischer  * called with Giant held, proc and schedlock not held.
1202696058c3SJulian Elischer  */
1203696058c3SJulian Elischer void
1204696058c3SJulian Elischer thread_wait(struct proc *p)
1205696058c3SJulian Elischer {
1206696058c3SJulian Elischer 	struct thread *td;
1207696058c3SJulian Elischer 
1208696058c3SJulian Elischer 	KASSERT((p->p_numthreads == 1), ("Muliple threads in wait1()"));
1209696058c3SJulian Elischer 	KASSERT((p->p_numksegrps == 1), ("Muliple ksegrps in wait1()"));
1210696058c3SJulian Elischer 	FOREACH_THREAD_IN_PROC(p, td) {
1211696058c3SJulian Elischer 		if (td->td_standin != NULL) {
1212696058c3SJulian Elischer 			thread_free(td->td_standin);
1213696058c3SJulian Elischer 			td->td_standin = NULL;
1214696058c3SJulian Elischer 		}
1215696058c3SJulian Elischer 		cpu_thread_clean(td);
1216696058c3SJulian Elischer 	}
1217696058c3SJulian Elischer 	thread_reap();	/* check for zombie threads etc. */
1218696058c3SJulian Elischer }
1219696058c3SJulian Elischer 
1220696058c3SJulian Elischer /*
122144990b8cSJulian Elischer  * Link a thread to a process.
12221faf202eSJulian Elischer  * set up anything that needs to be initialized for it to
12231faf202eSJulian Elischer  * be used by the process.
122444990b8cSJulian Elischer  *
122544990b8cSJulian Elischer  * Note that we do not link to the proc's ucred here.
122644990b8cSJulian Elischer  * The thread is linked as if running but no KSE assigned.
122744990b8cSJulian Elischer  */
122844990b8cSJulian Elischer void
122944990b8cSJulian Elischer thread_link(struct thread *td, struct ksegrp *kg)
123044990b8cSJulian Elischer {
123144990b8cSJulian Elischer 	struct proc *p;
123244990b8cSJulian Elischer 
123344990b8cSJulian Elischer 	p = kg->kg_proc;
123471fad9fdSJulian Elischer 	td->td_state    = TDS_INACTIVE;
123544990b8cSJulian Elischer 	td->td_proc     = p;
123644990b8cSJulian Elischer 	td->td_ksegrp   = kg;
123744990b8cSJulian Elischer 	td->td_last_kse = NULL;
12385215b187SJeff Roberson 	td->td_flags    = 0;
12395215b187SJeff Roberson 	td->td_kse      = NULL;
124044990b8cSJulian Elischer 
12411faf202eSJulian Elischer 	LIST_INIT(&td->td_contested);
12421faf202eSJulian Elischer 	callout_init(&td->td_slpcallout, 1);
124344990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
124444990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
124544990b8cSJulian Elischer 	p->p_numthreads++;
124644990b8cSJulian Elischer 	kg->kg_numthreads++;
124744990b8cSJulian Elischer }
124844990b8cSJulian Elischer 
1249d3a0bd78SJulian Elischer void
1250d3a0bd78SJulian Elischer thread_unlink(struct thread *td)
1251d3a0bd78SJulian Elischer {
1252d3a0bd78SJulian Elischer 	struct proc *p = td->td_proc;
1253d3a0bd78SJulian Elischer 	struct ksegrp *kg = td->td_ksegrp;
1254d3a0bd78SJulian Elischer 
1255112afcb2SJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
1256d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
1257d3a0bd78SJulian Elischer 	p->p_numthreads--;
1258d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
1259d3a0bd78SJulian Elischer 	kg->kg_numthreads--;
1260d3a0bd78SJulian Elischer 	/* could clear a few other things here */
1261d3a0bd78SJulian Elischer }
1262d3a0bd78SJulian Elischer 
12635215b187SJeff Roberson /*
12645215b187SJeff Roberson  * Purge a ksegrp resource. When a ksegrp is preparing to
12655215b187SJeff Roberson  * exit, it calls this function.
12665215b187SJeff Roberson  */
1267a6f37ac9SJohn Baldwin static void
12685215b187SJeff Roberson kse_purge_group(struct thread *td)
12695215b187SJeff Roberson {
12705215b187SJeff Roberson 	struct ksegrp *kg;
12715215b187SJeff Roberson 	struct kse *ke;
12725215b187SJeff Roberson 
12735215b187SJeff Roberson 	kg = td->td_ksegrp;
12745215b187SJeff Roberson  	KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__));
12755215b187SJeff Roberson 	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
12765215b187SJeff Roberson 		KASSERT(ke->ke_state == KES_IDLE,
12775215b187SJeff Roberson 			("%s: wrong idle KSE state", __func__));
12785215b187SJeff Roberson 		kse_unlink(ke);
12795215b187SJeff Roberson 	}
12805215b187SJeff Roberson 	KASSERT((kg->kg_kses == 1),
12815215b187SJeff Roberson 		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
12825215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0),
12835215b187SJeff Roberson 	        ("%s: ksegrp still has %d upcall datas",
12845215b187SJeff Roberson 		__func__, kg->kg_numupcalls));
12855215b187SJeff Roberson }
12865215b187SJeff Roberson 
12875215b187SJeff Roberson /*
12885215b187SJeff Roberson  * Purge a process's KSE resource. When a process is preparing to
12895215b187SJeff Roberson  * exit, it calls kse_purge to release any extra KSE resources in
12905215b187SJeff Roberson  * the process.
12915215b187SJeff Roberson  */
1292a6f37ac9SJohn Baldwin static void
12935c8329edSJulian Elischer kse_purge(struct proc *p, struct thread *td)
12945c8329edSJulian Elischer {
12955c8329edSJulian Elischer 	struct ksegrp *kg;
12965215b187SJeff Roberson 	struct kse *ke;
12975c8329edSJulian Elischer 
12985c8329edSJulian Elischer  	KASSERT(p->p_numthreads == 1, ("bad thread number"));
12995c8329edSJulian Elischer 	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
13005c8329edSJulian Elischer 		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
13015c8329edSJulian Elischer 		p->p_numksegrps--;
13025215b187SJeff Roberson 		/*
13035215b187SJeff Roberson 		 * There is no ownership for KSE, after all threads
13045215b187SJeff Roberson 		 * in the group exited, it is possible that some KSEs
13055215b187SJeff Roberson 		 * were left in idle queue, gc them now.
13065215b187SJeff Roberson 		 */
13075215b187SJeff Roberson 		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
13085215b187SJeff Roberson 			KASSERT(ke->ke_state == KES_IDLE,
13095215b187SJeff Roberson 			   ("%s: wrong idle KSE state", __func__));
13105215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
13115215b187SJeff Roberson 			kg->kg_idle_kses--;
13125215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
13135215b187SJeff Roberson 			kg->kg_kses--;
13145215b187SJeff Roberson 			kse_stash(ke);
13155215b187SJeff Roberson 		}
13165c8329edSJulian Elischer 		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
13175c8329edSJulian Elischer 		        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
13185215b187SJeff Roberson 		        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
13195215b187SJeff Roberson 		KASSERT((kg->kg_numupcalls == 0),
13205215b187SJeff Roberson 		        ("%s: ksegrp still has %d upcall datas",
13215215b187SJeff Roberson 			__func__, kg->kg_numupcalls));
13225215b187SJeff Roberson 
13235215b187SJeff Roberson 		if (kg != td->td_ksegrp)
13245c8329edSJulian Elischer 			ksegrp_stash(kg);
13255c8329edSJulian Elischer 	}
13265c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
13275c8329edSJulian Elischer 	p->p_numksegrps++;
13285c8329edSJulian Elischer }
13295c8329edSJulian Elischer 
13305215b187SJeff Roberson /*
13315215b187SJeff Roberson  * This function is intended to be used to initialize a spare thread
13325215b187SJeff Roberson  * for upcall. Initialize thread's large data area outside sched_lock
13335215b187SJeff Roberson  * for thread_schedule_upcall().
13345215b187SJeff Roberson  */
13355215b187SJeff Roberson void
13365215b187SJeff Roberson thread_alloc_spare(struct thread *td, struct thread *spare)
13375215b187SJeff Roberson {
13385215b187SJeff Roberson 	if (td->td_standin)
13395215b187SJeff Roberson 		return;
13405215b187SJeff Roberson 	if (spare == NULL)
13415215b187SJeff Roberson 		spare = thread_alloc();
13425215b187SJeff Roberson 	td->td_standin = spare;
13435215b187SJeff Roberson 	bzero(&spare->td_startzero,
13445215b187SJeff Roberson 	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
13455215b187SJeff Roberson 	spare->td_proc = td->td_proc;
13465215b187SJeff Roberson 	spare->td_ucred = crhold(td->td_ucred);
13475215b187SJeff Roberson }
13485c8329edSJulian Elischer 
134944990b8cSJulian Elischer /*
1350c76e33b6SJonathan Mini  * Create a thread and schedule it for upcall on the KSE given.
135193a7aa79SJulian Elischer  * Use our thread's standin so that we don't have to allocate one.
135244990b8cSJulian Elischer  */
135344990b8cSJulian Elischer struct thread *
13545215b187SJeff Roberson thread_schedule_upcall(struct thread *td, struct kse_upcall *ku)
135544990b8cSJulian Elischer {
135644990b8cSJulian Elischer 	struct thread *td2;
135744990b8cSJulian Elischer 
135844990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
135948bfcdddSJulian Elischer 
136048bfcdddSJulian Elischer 	/*
13615215b187SJeff Roberson 	 * Schedule an upcall thread on specified kse_upcall,
13625215b187SJeff Roberson 	 * the kse_upcall must be free.
13635215b187SJeff Roberson 	 * td must have a spare thread.
136448bfcdddSJulian Elischer 	 */
13655215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__));
136648bfcdddSJulian Elischer 	if ((td2 = td->td_standin) != NULL) {
136748bfcdddSJulian Elischer 		td->td_standin = NULL;
136844990b8cSJulian Elischer 	} else {
13695215b187SJeff Roberson 		panic("no reserve thread when scheduling an upcall");
137048bfcdddSJulian Elischer 		return (NULL);
137144990b8cSJulian Elischer 	}
137244990b8cSJulian Elischer 	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
137348bfcdddSJulian Elischer 	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
13741faf202eSJulian Elischer 	bcopy(&td->td_startcopy, &td2->td_startcopy,
13751faf202eSJulian Elischer 	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
13765215b187SJeff Roberson 	thread_link(td2, ku->ku_ksegrp);
137736f7b36fSDavid Xu 	/* inherit blocked thread's context */
137811e0f8e1SMarcel Moolenaar 	cpu_set_upcall(td2, td);
13795215b187SJeff Roberson 	/* Let the new thread become owner of the upcall */
13805215b187SJeff Roberson 	ku->ku_owner   = td2;
13815215b187SJeff Roberson 	td2->td_upcall = ku;
13825215b187SJeff Roberson 	td2->td_flags  = TDF_UPCALLING;
13834093529dSJeff Roberson #if 0	/* XXX This shouldn't be necessary */
138402bbffafSDavid Xu 	if (td->td_proc->p_sflag & PS_NEEDSIGCHK)
138502bbffafSDavid Xu 		td2->td_flags |= TDF_ASTPENDING;
13864093529dSJeff Roberson #endif
13875215b187SJeff Roberson 	td2->td_kse    = NULL;
138848bfcdddSJulian Elischer 	td2->td_state  = TDS_CAN_RUN;
138948bfcdddSJulian Elischer 	td2->td_inhibitors = 0;
139044990b8cSJulian Elischer 	setrunqueue(td2);
139148bfcdddSJulian Elischer 	return (td2);	/* bogus.. should be a void function */
139244990b8cSJulian Elischer }
139344990b8cSJulian Elischer 
139458a3c273SJeff Roberson void
139558a3c273SJeff Roberson thread_signal_add(struct thread *td, int sig)
1396c76e33b6SJonathan Mini {
139758a3c273SJeff Roberson 	struct kse_upcall *ku;
139858a3c273SJeff Roberson 	struct proc *p;
1399c76e33b6SJonathan Mini 	sigset_t ss;
1400c76e33b6SJonathan Mini 	int error;
1401c76e33b6SJonathan Mini 
1402b0bd5f38SDavid Xu 	p = td->td_proc;
1403b0bd5f38SDavid Xu 	PROC_LOCK_ASSERT(p, MA_OWNED);
1404b0bd5f38SDavid Xu 	mtx_assert(&p->p_sigacts->ps_mtx, MA_OWNED);
140558a3c273SJeff Roberson 	td = curthread;
140658a3c273SJeff Roberson 	ku = td->td_upcall;
1407b0bd5f38SDavid Xu 	mtx_unlock(&p->p_sigacts->ps_mtx);
1408c76e33b6SJonathan Mini 	PROC_UNLOCK(p);
140958a3c273SJeff Roberson 	error = copyin(&ku->ku_mailbox->km_sigscaught, &ss, sizeof(sigset_t));
1410c76e33b6SJonathan Mini 	if (error)
141158a3c273SJeff Roberson 		goto error;
141258a3c273SJeff Roberson 
1413c76e33b6SJonathan Mini 	SIGADDSET(ss, sig);
141458a3c273SJeff Roberson 
141558a3c273SJeff Roberson 	error = copyout(&ss, &ku->ku_mailbox->km_sigscaught, sizeof(sigset_t));
1416c76e33b6SJonathan Mini 	if (error)
141758a3c273SJeff Roberson 		goto error;
141858a3c273SJeff Roberson 
141958a3c273SJeff Roberson 	PROC_LOCK(p);
1420b0bd5f38SDavid Xu 	mtx_lock(&p->p_sigacts->ps_mtx);
142158a3c273SJeff Roberson 	return;
142258a3c273SJeff Roberson error:
142358a3c273SJeff Roberson 	PROC_LOCK(p);
142458a3c273SJeff Roberson 	sigexit(td, SIGILL);
142558a3c273SJeff Roberson }
142658a3c273SJeff Roberson 
142758a3c273SJeff Roberson 
142858a3c273SJeff Roberson /*
142958a3c273SJeff Roberson  * Schedule an upcall to notify a KSE process recieved signals.
143058a3c273SJeff Roberson  *
143158a3c273SJeff Roberson  */
143258a3c273SJeff Roberson void
143358a3c273SJeff Roberson thread_signal_upcall(struct thread *td)
143458a3c273SJeff Roberson {
1435c76e33b6SJonathan Mini 	mtx_lock_spin(&sched_lock);
143658a3c273SJeff Roberson 	td->td_flags |= TDF_UPCALLING;
1437c76e33b6SJonathan Mini 	mtx_unlock_spin(&sched_lock);
143858a3c273SJeff Roberson 
143958a3c273SJeff Roberson 	return;
1440c76e33b6SJonathan Mini }
1441c76e33b6SJonathan Mini 
14426ce75196SDavid Xu void
14436ce75196SDavid Xu thread_switchout(struct thread *td)
14446ce75196SDavid Xu {
14456ce75196SDavid Xu 	struct kse_upcall *ku;
14466ce75196SDavid Xu 
14476ce75196SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
14486ce75196SDavid Xu 
14496ce75196SDavid Xu 	/*
14506ce75196SDavid Xu 	 * If the outgoing thread is in threaded group and has never
14516ce75196SDavid Xu 	 * scheduled an upcall, decide whether this is a short
14526ce75196SDavid Xu 	 * or long term event and thus whether or not to schedule
14536ce75196SDavid Xu 	 * an upcall.
14546ce75196SDavid Xu 	 * If it is a short term event, just suspend it in
14556ce75196SDavid Xu 	 * a way that takes its KSE with it.
14566ce75196SDavid Xu 	 * Select the events for which we want to schedule upcalls.
14576ce75196SDavid Xu 	 * For now it's just sleep.
14586ce75196SDavid Xu 	 * XXXKSE eventually almost any inhibition could do.
14596ce75196SDavid Xu 	 */
14606ce75196SDavid Xu 	if (TD_CAN_UNBIND(td) && (td->td_standin) && TD_ON_SLEEPQ(td)) {
14616ce75196SDavid Xu 		/*
14626ce75196SDavid Xu 		 * Release ownership of upcall, and schedule an upcall
14636ce75196SDavid Xu 		 * thread, this new upcall thread becomes the owner of
14646ce75196SDavid Xu 		 * the upcall structure.
14656ce75196SDavid Xu 		 */
14666ce75196SDavid Xu 		ku = td->td_upcall;
14676ce75196SDavid Xu 		ku->ku_owner = NULL;
14686ce75196SDavid Xu 		td->td_upcall = NULL;
14696ce75196SDavid Xu 		td->td_flags &= ~TDF_CAN_UNBIND;
14706ce75196SDavid Xu 		thread_schedule_upcall(td, ku);
14716ce75196SDavid Xu 	}
14726ce75196SDavid Xu }
14736ce75196SDavid Xu 
1474c76e33b6SJonathan Mini /*
14755215b187SJeff Roberson  * Setup done on the thread when it enters the kernel.
14761434d3feSJulian Elischer  * XXXKSE Presently only for syscalls but eventually all kernel entries.
14771434d3feSJulian Elischer  */
14781434d3feSJulian Elischer void
14791434d3feSJulian Elischer thread_user_enter(struct proc *p, struct thread *td)
14801434d3feSJulian Elischer {
14815215b187SJeff Roberson 	struct ksegrp *kg;
14825215b187SJeff Roberson 	struct kse_upcall *ku;
14831ecb38a3SDavid Xu 	struct kse_thr_mailbox *tmbx;
14841434d3feSJulian Elischer 
14855215b187SJeff Roberson 	kg = td->td_ksegrp;
14861ecb38a3SDavid Xu 
14871434d3feSJulian Elischer 	/*
14881434d3feSJulian Elischer 	 * First check that we shouldn't just abort.
14891434d3feSJulian Elischer 	 * But check if we are the single thread first!
14901434d3feSJulian Elischer 	 */
14911434d3feSJulian Elischer 	PROC_LOCK(p);
1492889a6b58SJohn Baldwin 	if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
14931434d3feSJulian Elischer 		mtx_lock_spin(&sched_lock);
1494e574e444SDavid Xu 		thread_stopped(p);
14951434d3feSJulian Elischer 		thread_exit();
14961434d3feSJulian Elischer 		/* NOTREACHED */
14971434d3feSJulian Elischer 	}
1498889a6b58SJohn Baldwin 	PROC_UNLOCK(p);
14991434d3feSJulian Elischer 
15001434d3feSJulian Elischer 	/*
15011434d3feSJulian Elischer 	 * If we are doing a syscall in a KSE environment,
15021434d3feSJulian Elischer 	 * note where our mailbox is. There is always the
150393a7aa79SJulian Elischer 	 * possibility that we could do this lazily (in kse_reassign()),
15041434d3feSJulian Elischer 	 * but for now do it every time.
15051434d3feSJulian Elischer 	 */
15065215b187SJeff Roberson 	kg = td->td_ksegrp;
15075215b187SJeff Roberson 	if (kg->kg_numupcalls) {
15085215b187SJeff Roberson 		ku = td->td_upcall;
15095215b187SJeff Roberson 		KASSERT(ku, ("%s: no upcall owned", __func__));
15105215b187SJeff Roberson 		KASSERT((ku->ku_owner == td), ("%s: wrong owner", __func__));
15111ecb38a3SDavid Xu 		KASSERT(!TD_CAN_UNBIND(td), ("%s: can unbind", __func__));
15121ecb38a3SDavid Xu 		ku->ku_mflags = fuword((void *)&ku->ku_mailbox->km_flags);
15131ecb38a3SDavid Xu 		tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
15141ecb38a3SDavid Xu 		if ((tmbx == NULL) || (tmbx == (void *)-1)) {
15155215b187SJeff Roberson 			td->td_mailbox = NULL;
15168798d4f9SDavid Xu 		} else {
15171ecb38a3SDavid Xu 			td->td_mailbox = tmbx;
15181434d3feSJulian Elischer 			if (td->td_standin == NULL)
15195215b187SJeff Roberson 				thread_alloc_spare(td, NULL);
15208798d4f9SDavid Xu 			mtx_lock_spin(&sched_lock);
15211ecb38a3SDavid Xu 			if (ku->ku_mflags & KMF_NOUPCALL)
15221ecb38a3SDavid Xu 				td->td_flags &= ~TDF_CAN_UNBIND;
15231ecb38a3SDavid Xu 			else
152493a7aa79SJulian Elischer 				td->td_flags |= TDF_CAN_UNBIND;
15258798d4f9SDavid Xu 			mtx_unlock_spin(&sched_lock);
15265215b187SJeff Roberson 		}
15271434d3feSJulian Elischer 	}
15281434d3feSJulian Elischer }
15291434d3feSJulian Elischer 
15301434d3feSJulian Elischer /*
1531c76e33b6SJonathan Mini  * The extra work we go through if we are a threaded process when we
1532c76e33b6SJonathan Mini  * return to userland.
1533c76e33b6SJonathan Mini  *
1534c76e33b6SJonathan Mini  * If we are a KSE process and returning to user mode, check for
1535c76e33b6SJonathan Mini  * extra work to do before we return (e.g. for more syscalls
1536c76e33b6SJonathan Mini  * to complete first).  If we were in a critical section, we should
1537c76e33b6SJonathan Mini  * just return to let it finish. Same if we were in the UTS (in
1538c76e33b6SJonathan Mini  * which case the mailbox's context's busy indicator will be set).
1539c76e33b6SJonathan Mini  * The only traps we suport will have set the mailbox.
1540c76e33b6SJonathan Mini  * We will clear it here.
154144990b8cSJulian Elischer  */
1542c76e33b6SJonathan Mini int
1543253fdd5bSJulian Elischer thread_userret(struct thread *td, struct trapframe *frame)
1544c76e33b6SJonathan Mini {
15451ecb38a3SDavid Xu 	int error = 0, upcalls, uts_crit;
15465215b187SJeff Roberson 	struct kse_upcall *ku;
15470252d203SDavid Xu 	struct ksegrp *kg, *kg2;
154848bfcdddSJulian Elischer 	struct proc *p;
1549bfd83250SDavid Xu 	struct timespec ts;
1550c76e33b6SJonathan Mini 
15516f8132a8SJulian Elischer 	p = td->td_proc;
15525215b187SJeff Roberson 	kg = td->td_ksegrp;
155393a7aa79SJulian Elischer 
15545215b187SJeff Roberson 	/* Nothing to do with non-threaded group/process */
15555215b187SJeff Roberson 	if (td->td_ksegrp->kg_numupcalls == 0)
15565215b187SJeff Roberson 		return (0);
15575215b187SJeff Roberson 
15585215b187SJeff Roberson 	/*
15595215b187SJeff Roberson 	 * Stat clock interrupt hit in userland, it
15605215b187SJeff Roberson 	 * is returning from interrupt, charge thread's
15615215b187SJeff Roberson 	 * userland time for UTS.
15625215b187SJeff Roberson 	 */
15635215b187SJeff Roberson 	if (td->td_flags & TDF_USTATCLOCK) {
15644b4866edSDavid Xu 		thread_update_usr_ticks(td, 1);
156593a7aa79SJulian Elischer 		mtx_lock_spin(&sched_lock);
15665215b187SJeff Roberson 		td->td_flags &= ~TDF_USTATCLOCK;
15670dbb100bSDavid Xu 		mtx_unlock_spin(&sched_lock);
15684b4866edSDavid Xu 		if (kg->kg_completed ||
15694b4866edSDavid Xu 		    (td->td_upcall->ku_flags & KUF_DOUPCALL))
15704b4866edSDavid Xu 			thread_user_enter(p, td);
15715215b187SJeff Roberson 	}
15725215b187SJeff Roberson 
15731ecb38a3SDavid Xu 	uts_crit = (td->td_mailbox == NULL);
15741ecb38a3SDavid Xu 	ku = td->td_upcall;
15755215b187SJeff Roberson 	/*
15765215b187SJeff Roberson 	 * Optimisation:
15775215b187SJeff Roberson 	 * This thread has not started any upcall.
15785215b187SJeff Roberson 	 * If there is no work to report other than ourself,
15795215b187SJeff Roberson 	 * then it can return direct to userland.
15805215b187SJeff Roberson 	 */
15815215b187SJeff Roberson 	if (TD_CAN_UNBIND(td)) {
15825215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
15835215b187SJeff Roberson 		td->td_flags &= ~TDF_CAN_UNBIND;
15844093529dSJeff Roberson 		if ((td->td_flags & TDF_NEEDSIGCHK) == 0 &&
158521e0492aSDavid Xu 		    (kg->kg_completed == NULL) &&
15866ce75196SDavid Xu 		    (ku->ku_flags & KUF_DOUPCALL) == 0 &&
158795bee4c3SDavid Xu 		    (kg->kg_upquantum && ticks < kg->kg_nextupcall)) {
15884093529dSJeff Roberson 			mtx_unlock_spin(&sched_lock);
15894b4866edSDavid Xu 			thread_update_usr_ticks(td, 0);
15909a4b78c9SDavid Xu 			nanotime(&ts);
15919a4b78c9SDavid Xu 			error = copyout(&ts,
15929a4b78c9SDavid Xu 				(caddr_t)&ku->ku_mailbox->km_timeofday,
15939a4b78c9SDavid Xu 				sizeof(ts));
159421e0492aSDavid Xu 			td->td_mailbox = 0;
15951ecb38a3SDavid Xu 			ku->ku_mflags = 0;
15969a4b78c9SDavid Xu 			if (error)
15979a4b78c9SDavid Xu 				goto out;
159893a7aa79SJulian Elischer 			return (0);
159993a7aa79SJulian Elischer 		}
16004093529dSJeff Roberson 		mtx_unlock_spin(&sched_lock);
160193a7aa79SJulian Elischer 		error = thread_export_context(td);
160248bfcdddSJulian Elischer 		if (error) {
160348bfcdddSJulian Elischer 			/*
16045215b187SJeff Roberson 			 * Failing to do the KSE operation just defaults
160548bfcdddSJulian Elischer 			 * back to synchonous operation, so just return from
160693a7aa79SJulian Elischer 			 * the syscall.
160793a7aa79SJulian Elischer 			 */
16081ecb38a3SDavid Xu 			goto out;
160993a7aa79SJulian Elischer 		}
161093a7aa79SJulian Elischer 		/*
16115215b187SJeff Roberson 		 * There is something to report, and we own an upcall
16125215b187SJeff Roberson 		 * strucuture, we can go to userland.
16135215b187SJeff Roberson 		 * Turn ourself into an upcall thread.
161493a7aa79SJulian Elischer 		 */
16155215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
161693a7aa79SJulian Elischer 		td->td_flags |= TDF_UPCALLING;
161793a7aa79SJulian Elischer 		mtx_unlock_spin(&sched_lock);
16181ecb38a3SDavid Xu 	} else if (td->td_mailbox && (ku == NULL)) {
161993a7aa79SJulian Elischer 		error = thread_export_context(td);
162093a7aa79SJulian Elischer 		/* possibly upcall with error? */
1621e574e444SDavid Xu 		PROC_LOCK(p);
16226f8132a8SJulian Elischer 		/*
16235215b187SJeff Roberson 		 * There are upcall threads waiting for
16245215b187SJeff Roberson 		 * work to do, wake one of them up.
16255215b187SJeff Roberson 		 * XXXKSE Maybe wake all of them up.
16266f8132a8SJulian Elischer 		 */
1627e574e444SDavid Xu 		if (!error && kg->kg_upsleeps)
16285215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
1629e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
1630e574e444SDavid Xu 		thread_stopped(p);
163193a7aa79SJulian Elischer 		thread_exit();
16325215b187SJeff Roberson 		/* NOTREACHED */
163348bfcdddSJulian Elischer 	}
163493a7aa79SJulian Elischer 
1635a87891eeSDavid Xu 	KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind"));
1636a87891eeSDavid Xu 
1637a87891eeSDavid Xu 	if (p->p_numthreads > max_threads_per_proc) {
1638a87891eeSDavid Xu 		max_threads_hits++;
1639a87891eeSDavid Xu 		PROC_LOCK(p);
1640112afcb2SJohn Baldwin 		mtx_lock_spin(&sched_lock);
16417677ce18SDavid Xu 		p->p_maxthrwaits++;
1642a87891eeSDavid Xu 		while (p->p_numthreads > max_threads_per_proc) {
1643a87891eeSDavid Xu 			upcalls = 0;
1644a87891eeSDavid Xu 			FOREACH_KSEGRP_IN_PROC(p, kg2) {
1645a87891eeSDavid Xu 				if (kg2->kg_numupcalls == 0)
1646a87891eeSDavid Xu 					upcalls++;
1647a87891eeSDavid Xu 				else
1648a87891eeSDavid Xu 					upcalls += kg2->kg_numupcalls;
1649a87891eeSDavid Xu 			}
1650a87891eeSDavid Xu 			if (upcalls >= max_threads_per_proc)
1651a87891eeSDavid Xu 				break;
16525073e68fSDavid Xu 			mtx_unlock_spin(&sched_lock);
165336407becSDavid Xu 			if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH,
16547677ce18SDavid Xu 			    "maxthreads", NULL)) {
16557677ce18SDavid Xu 				mtx_lock_spin(&sched_lock);
165636407becSDavid Xu 				break;
16577677ce18SDavid Xu 			} else {
1658112afcb2SJohn Baldwin 				mtx_lock_spin(&sched_lock);
1659a87891eeSDavid Xu 			}
16607677ce18SDavid Xu 		}
16617677ce18SDavid Xu 		p->p_maxthrwaits--;
1662112afcb2SJohn Baldwin 		mtx_unlock_spin(&sched_lock);
1663a87891eeSDavid Xu 		PROC_UNLOCK(p);
1664a87891eeSDavid Xu 	}
1665a87891eeSDavid Xu 
166693a7aa79SJulian Elischer 	if (td->td_flags & TDF_UPCALLING) {
16671ecb38a3SDavid Xu 		uts_crit = 0;
16686ce75196SDavid Xu 		kg->kg_nextupcall = ticks+kg->kg_upquantum;
166948bfcdddSJulian Elischer 		/*
167044990b8cSJulian Elischer 		 * There is no more work to do and we are going to ride
16715215b187SJeff Roberson 		 * this thread up to userland as an upcall.
167248bfcdddSJulian Elischer 		 * Do the last parts of the setup needed for the upcall.
167344990b8cSJulian Elischer 		 */
1674c76e33b6SJonathan Mini 		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1675ed32df81SJulian Elischer 		    td, td->td_proc->p_pid, td->td_proc->p_comm);
1676c76e33b6SJonathan Mini 
16775215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
16785215b187SJeff Roberson 		td->td_flags &= ~TDF_UPCALLING;
16795215b187SJeff Roberson 		if (ku->ku_flags & KUF_DOUPCALL)
16805215b187SJeff Roberson 			ku->ku_flags &= ~KUF_DOUPCALL;
16815215b187SJeff Roberson 		mtx_unlock_spin(&sched_lock);
16823d0586d4SJulian Elischer 
1683c76e33b6SJonathan Mini 		/*
16841ecb38a3SDavid Xu 		 * Set user context to the UTS
16851ecb38a3SDavid Xu 		 */
16861ecb38a3SDavid Xu 		if (!(ku->ku_mflags & KMF_NOUPCALL)) {
16871ecb38a3SDavid Xu 			cpu_set_upcall_kse(td, ku);
16881ecb38a3SDavid Xu 			error = suword(&ku->ku_mailbox->km_curthread, 0);
16891ecb38a3SDavid Xu 			if (error)
16901ecb38a3SDavid Xu 				goto out;
16911ecb38a3SDavid Xu 		}
16921ecb38a3SDavid Xu 
16931ecb38a3SDavid Xu 		/*
169493a7aa79SJulian Elischer 		 * Unhook the list of completed threads.
169593a7aa79SJulian Elischer 		 * anything that completes after this gets to
169693a7aa79SJulian Elischer 		 * come in next time.
169793a7aa79SJulian Elischer 		 * Put the list of completed thread mailboxes on
169893a7aa79SJulian Elischer 		 * this KSE's mailbox.
1699c76e33b6SJonathan Mini 		 */
17001ecb38a3SDavid Xu 		if (!(ku->ku_mflags & KMF_NOCOMPLETED) &&
17011ecb38a3SDavid Xu 		    (error = thread_link_mboxes(kg, ku)) != 0)
17020252d203SDavid Xu 			goto out;
17031ecb38a3SDavid Xu 	}
17041ecb38a3SDavid Xu 	if (!uts_crit) {
1705bfd83250SDavid Xu 		nanotime(&ts);
17061ecb38a3SDavid Xu 		error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts));
1707bfd83250SDavid Xu 	}
17080252d203SDavid Xu 
17090252d203SDavid Xu out:
17100252d203SDavid Xu 	if (error) {
17113d0586d4SJulian Elischer 		/*
1712fc8cdd87SDavid Xu 		 * Things are going to be so screwed we should just kill
1713fc8cdd87SDavid Xu 		 * the process.
17143d0586d4SJulian Elischer 		 * how do we do that?
17153d0586d4SJulian Elischer 		 */
171648bfcdddSJulian Elischer 		PROC_LOCK(td->td_proc);
171748bfcdddSJulian Elischer 		psignal(td->td_proc, SIGSEGV);
171848bfcdddSJulian Elischer 		PROC_UNLOCK(td->td_proc);
17190252d203SDavid Xu 	} else {
17200252d203SDavid Xu 		/*
17210252d203SDavid Xu 		 * Optimisation:
17220252d203SDavid Xu 		 * Ensure that we have a spare thread available,
17230252d203SDavid Xu 		 * for when we re-enter the kernel.
17240252d203SDavid Xu 		 */
17250252d203SDavid Xu 		if (td->td_standin == NULL)
17260252d203SDavid Xu 			thread_alloc_spare(td, NULL);
17270252d203SDavid Xu 	}
17280252d203SDavid Xu 
17291ecb38a3SDavid Xu 	ku->ku_mflags = 0;
17300252d203SDavid Xu 	/*
17310252d203SDavid Xu 	 * Clear thread mailbox first, then clear system tick count.
17320252d203SDavid Xu 	 * The order is important because thread_statclock() use
17330252d203SDavid Xu 	 * mailbox pointer to see if it is an userland thread or
17340252d203SDavid Xu 	 * an UTS kernel thread.
17350252d203SDavid Xu 	 */
173693a7aa79SJulian Elischer 	td->td_mailbox = NULL;
17375215b187SJeff Roberson 	td->td_usticks = 0;
173848bfcdddSJulian Elischer 	return (error);	/* go sync */
173944990b8cSJulian Elischer }
174044990b8cSJulian Elischer 
174144990b8cSJulian Elischer /*
174244990b8cSJulian Elischer  * Enforce single-threading.
174344990b8cSJulian Elischer  *
174444990b8cSJulian Elischer  * Returns 1 if the caller must abort (another thread is waiting to
174544990b8cSJulian Elischer  * exit the process or similar). Process is locked!
174644990b8cSJulian Elischer  * Returns 0 when you are successfully the only thread running.
174744990b8cSJulian Elischer  * A process has successfully single threaded in the suspend mode when
174844990b8cSJulian Elischer  * There are no threads in user mode. Threads in the kernel must be
174944990b8cSJulian Elischer  * allowed to continue until they get to the user boundary. They may even
175044990b8cSJulian Elischer  * copy out their return values and data before suspending. They may however be
175144990b8cSJulian Elischer  * accellerated in reaching the user boundary as we will wake up
175244990b8cSJulian Elischer  * any sleeping threads that are interruptable. (PCATCH).
175344990b8cSJulian Elischer  */
175444990b8cSJulian Elischer int
175544990b8cSJulian Elischer thread_single(int force_exit)
175644990b8cSJulian Elischer {
175744990b8cSJulian Elischer 	struct thread *td;
175844990b8cSJulian Elischer 	struct thread *td2;
175944990b8cSJulian Elischer 	struct proc *p;
176044990b8cSJulian Elischer 
176144990b8cSJulian Elischer 	td = curthread;
176244990b8cSJulian Elischer 	p = td->td_proc;
1763696058c3SJulian Elischer 	mtx_assert(&Giant, MA_OWNED);
176444990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
176544990b8cSJulian Elischer 	KASSERT((td != NULL), ("curthread is NULL"));
176644990b8cSJulian Elischer 
17672c10d16aSJeff Roberson 	if ((p->p_flag & P_THREADED) == 0 && p->p_numthreads == 1)
176844990b8cSJulian Elischer 		return (0);
176944990b8cSJulian Elischer 
1770e3b9bf71SJulian Elischer 	/* Is someone already single threading? */
1771e3b9bf71SJulian Elischer 	if (p->p_singlethread)
177244990b8cSJulian Elischer 		return (1);
177344990b8cSJulian Elischer 
177493a7aa79SJulian Elischer 	if (force_exit == SINGLE_EXIT) {
177544990b8cSJulian Elischer 		p->p_flag |= P_SINGLE_EXIT;
177693a7aa79SJulian Elischer 	} else
177744990b8cSJulian Elischer 		p->p_flag &= ~P_SINGLE_EXIT;
17781279572aSDavid Xu 	p->p_flag |= P_STOPPED_SINGLE;
177971fad9fdSJulian Elischer 	mtx_lock_spin(&sched_lock);
1780112afcb2SJohn Baldwin 	p->p_singlethread = td;
1781112afcb2SJohn Baldwin 	while ((p->p_numthreads - p->p_suspcount) != 1) {
178244990b8cSJulian Elischer 		FOREACH_THREAD_IN_PROC(p, td2) {
178344990b8cSJulian Elischer 			if (td2 == td)
178444990b8cSJulian Elischer 				continue;
1785588257e8SDavid Xu 			td2->td_flags |= TDF_ASTPENDING;
178671fad9fdSJulian Elischer 			if (TD_IS_INHIBITED(td2)) {
17871279572aSDavid Xu 				if (force_exit == SINGLE_EXIT) {
17889d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2)) {
178971fad9fdSJulian Elischer 						thread_unsuspend_one(td2);
179071fad9fdSJulian Elischer 					}
179133862f40SDavid Xu 					if (TD_ON_SLEEPQ(td2) &&
179233862f40SDavid Xu 					    (td2->td_flags & TDF_SINTR)) {
1793e3b9bf71SJulian Elischer 						if (td2->td_flags & TDF_CVWAITQ)
179433862f40SDavid Xu 							cv_abort(td2);
1795e3b9bf71SJulian Elischer 						else
179633862f40SDavid Xu 							abortsleep(td2);
179771fad9fdSJulian Elischer 					}
17989d102777SJulian Elischer 				} else {
17999d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2))
18009d102777SJulian Elischer 						continue;
18015215b187SJeff Roberson 					/*
18025215b187SJeff Roberson 					 * maybe other inhibitted states too?
18035215b187SJeff Roberson 					 * XXXKSE Is it totally safe to
18045215b187SJeff Roberson 					 * suspend a non-interruptable thread?
18055215b187SJeff Roberson 					 */
180693a7aa79SJulian Elischer 					if (td2->td_inhibitors &
18075215b187SJeff Roberson 					    (TDI_SLEEPING | TDI_SWAPPED))
18089d102777SJulian Elischer 						thread_suspend_one(td2);
180944990b8cSJulian Elischer 				}
181044990b8cSJulian Elischer 			}
18119d102777SJulian Elischer 		}
18129d102777SJulian Elischer 		/*
18139d102777SJulian Elischer 		 * Maybe we suspended some threads.. was it enough?
18149d102777SJulian Elischer 		 */
1815112afcb2SJohn Baldwin 		if ((p->p_numthreads - p->p_suspcount) == 1)
18169d102777SJulian Elischer 			break;
18179d102777SJulian Elischer 
181844990b8cSJulian Elischer 		/*
181944990b8cSJulian Elischer 		 * Wake us up when everyone else has suspended.
1820e3b9bf71SJulian Elischer 		 * In the mean time we suspend as well.
182144990b8cSJulian Elischer 		 */
182271fad9fdSJulian Elischer 		thread_suspend_one(td);
182311b20c68SDavid Xu 		DROP_GIANT();
182444990b8cSJulian Elischer 		PROC_UNLOCK(p);
1825696058c3SJulian Elischer 		p->p_stats->p_ru.ru_nvcsw++;
182644990b8cSJulian Elischer 		mi_switch();
182744990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
182811b20c68SDavid Xu 		PICKUP_GIANT();
182944990b8cSJulian Elischer 		PROC_LOCK(p);
1830112afcb2SJohn Baldwin 		mtx_lock_spin(&sched_lock);
183144990b8cSJulian Elischer 	}
18325215b187SJeff Roberson 	if (force_exit == SINGLE_EXIT) {
1833112afcb2SJohn Baldwin 		if (td->td_upcall)
18345215b187SJeff Roberson 			upcall_remove(td);
18355c8329edSJulian Elischer 		kse_purge(p, td);
18365215b187SJeff Roberson 	}
1837112afcb2SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
183844990b8cSJulian Elischer 	return (0);
183944990b8cSJulian Elischer }
184044990b8cSJulian Elischer 
184144990b8cSJulian Elischer /*
184244990b8cSJulian Elischer  * Called in from locations that can safely check to see
184344990b8cSJulian Elischer  * whether we have to suspend or at least throttle for a
184444990b8cSJulian Elischer  * single-thread event (e.g. fork).
184544990b8cSJulian Elischer  *
184644990b8cSJulian Elischer  * Such locations include userret().
184744990b8cSJulian Elischer  * If the "return_instead" argument is non zero, the thread must be able to
184844990b8cSJulian Elischer  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
184944990b8cSJulian Elischer  *
185044990b8cSJulian Elischer  * The 'return_instead' argument tells the function if it may do a
185144990b8cSJulian Elischer  * thread_exit() or suspend, or whether the caller must abort and back
185244990b8cSJulian Elischer  * out instead.
185344990b8cSJulian Elischer  *
185444990b8cSJulian Elischer  * If the thread that set the single_threading request has set the
185544990b8cSJulian Elischer  * P_SINGLE_EXIT bit in the process flags then this call will never return
185644990b8cSJulian Elischer  * if 'return_instead' is false, but will exit.
185744990b8cSJulian Elischer  *
185844990b8cSJulian Elischer  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
185944990b8cSJulian Elischer  *---------------+--------------------+---------------------
186044990b8cSJulian Elischer  *       0       | returns 0          |   returns 0 or 1
186144990b8cSJulian Elischer  *               | when ST ends       |   immediatly
186244990b8cSJulian Elischer  *---------------+--------------------+---------------------
186344990b8cSJulian Elischer  *       1       | thread exits       |   returns 1
186444990b8cSJulian Elischer  *               |                    |  immediatly
186544990b8cSJulian Elischer  * 0 = thread_exit() or suspension ok,
186644990b8cSJulian Elischer  * other = return error instead of stopping the thread.
186744990b8cSJulian Elischer  *
186844990b8cSJulian Elischer  * While a full suspension is under effect, even a single threading
186944990b8cSJulian Elischer  * thread would be suspended if it made this call (but it shouldn't).
187044990b8cSJulian Elischer  * This call should only be made from places where
187144990b8cSJulian Elischer  * thread_exit() would be safe as that may be the outcome unless
187244990b8cSJulian Elischer  * return_instead is set.
187344990b8cSJulian Elischer  */
187444990b8cSJulian Elischer int
187544990b8cSJulian Elischer thread_suspend_check(int return_instead)
187644990b8cSJulian Elischer {
1877ecafb24bSJuli Mallett 	struct thread *td;
1878ecafb24bSJuli Mallett 	struct proc *p;
187944990b8cSJulian Elischer 
188044990b8cSJulian Elischer 	td = curthread;
188144990b8cSJulian Elischer 	p = td->td_proc;
188244990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
188344990b8cSJulian Elischer 	while (P_SHOULDSTOP(p)) {
18841279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
188544990b8cSJulian Elischer 			KASSERT(p->p_singlethread != NULL,
188644990b8cSJulian Elischer 			    ("singlethread not set"));
188744990b8cSJulian Elischer 			/*
1888e3b9bf71SJulian Elischer 			 * The only suspension in action is a
1889e3b9bf71SJulian Elischer 			 * single-threading. Single threader need not stop.
1890b6d5995eSJulian Elischer 			 * XXX Should be safe to access unlocked
1891b6d5995eSJulian Elischer 			 * as it can only be set to be true by us.
189244990b8cSJulian Elischer 			 */
1893e3b9bf71SJulian Elischer 			if (p->p_singlethread == td)
189444990b8cSJulian Elischer 				return (0);	/* Exempt from stopping. */
189544990b8cSJulian Elischer 		}
1896e3b9bf71SJulian Elischer 		if (return_instead)
189744990b8cSJulian Elischer 			return (1);
189844990b8cSJulian Elischer 
1899e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
1900e574e444SDavid Xu 		thread_stopped(p);
190144990b8cSJulian Elischer 		/*
190244990b8cSJulian Elischer 		 * If the process is waiting for us to exit,
190344990b8cSJulian Elischer 		 * this thread should just suicide.
19041279572aSDavid Xu 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
190544990b8cSJulian Elischer 		 */
190644990b8cSJulian Elischer 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
190744990b8cSJulian Elischer 			while (mtx_owned(&Giant))
190844990b8cSJulian Elischer 				mtx_unlock(&Giant);
19092c10d16aSJeff Roberson 			if (p->p_flag & P_THREADED)
191044990b8cSJulian Elischer 				thread_exit();
19112c10d16aSJeff Roberson 			else
19122c10d16aSJeff Roberson 				thr_exit1();
191344990b8cSJulian Elischer 		}
191444990b8cSJulian Elischer 
191544990b8cSJulian Elischer 		/*
191644990b8cSJulian Elischer 		 * When a thread suspends, it just
191744990b8cSJulian Elischer 		 * moves to the processes's suspend queue
191844990b8cSJulian Elischer 		 * and stays there.
191944990b8cSJulian Elischer 		 */
192071fad9fdSJulian Elischer 		thread_suspend_one(td);
19211279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1922cf19bf91SJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
192371fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
1924cf19bf91SJulian Elischer 			}
1925cf19bf91SJulian Elischer 		}
19261ecc6456SDavid Xu 		DROP_GIANT();
1927a6f37ac9SJohn Baldwin 		PROC_UNLOCK(p);
192820568366SJulian Elischer 		p->p_stats->p_ru.ru_nivcsw++;
192944990b8cSJulian Elischer 		mi_switch();
193044990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
19311ecc6456SDavid Xu 		PICKUP_GIANT();
193244990b8cSJulian Elischer 		PROC_LOCK(p);
193344990b8cSJulian Elischer 	}
193444990b8cSJulian Elischer 	return (0);
193544990b8cSJulian Elischer }
193644990b8cSJulian Elischer 
193735c32a76SDavid Xu void
193835c32a76SDavid Xu thread_suspend_one(struct thread *td)
193935c32a76SDavid Xu {
194035c32a76SDavid Xu 	struct proc *p = td->td_proc;
194135c32a76SDavid Xu 
194235c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
1943112afcb2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
1944e574e444SDavid Xu 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
194535c32a76SDavid Xu 	p->p_suspcount++;
194671fad9fdSJulian Elischer 	TD_SET_SUSPENDED(td);
194735c32a76SDavid Xu 	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
194871fad9fdSJulian Elischer 	/*
194971fad9fdSJulian Elischer 	 * Hack: If we are suspending but are on the sleep queue
195071fad9fdSJulian Elischer 	 * then we are in msleep or the cv equivalent. We
195171fad9fdSJulian Elischer 	 * want to look like we have two Inhibitors.
19529d102777SJulian Elischer 	 * May already be set.. doesn't matter.
195371fad9fdSJulian Elischer 	 */
195471fad9fdSJulian Elischer 	if (TD_ON_SLEEPQ(td))
195571fad9fdSJulian Elischer 		TD_SET_SLEEPING(td);
195635c32a76SDavid Xu }
195735c32a76SDavid Xu 
195835c32a76SDavid Xu void
195935c32a76SDavid Xu thread_unsuspend_one(struct thread *td)
196035c32a76SDavid Xu {
196135c32a76SDavid Xu 	struct proc *p = td->td_proc;
196235c32a76SDavid Xu 
196335c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
1964112afcb2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
196535c32a76SDavid Xu 	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
196671fad9fdSJulian Elischer 	TD_CLR_SUSPENDED(td);
196735c32a76SDavid Xu 	p->p_suspcount--;
196871fad9fdSJulian Elischer 	setrunnable(td);
196935c32a76SDavid Xu }
197035c32a76SDavid Xu 
197144990b8cSJulian Elischer /*
197244990b8cSJulian Elischer  * Allow all threads blocked by single threading to continue running.
197344990b8cSJulian Elischer  */
197444990b8cSJulian Elischer void
197544990b8cSJulian Elischer thread_unsuspend(struct proc *p)
197644990b8cSJulian Elischer {
197744990b8cSJulian Elischer 	struct thread *td;
197844990b8cSJulian Elischer 
1979b6d5995eSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
198044990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
198144990b8cSJulian Elischer 	if (!P_SHOULDSTOP(p)) {
198244990b8cSJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
198335c32a76SDavid Xu 			thread_unsuspend_one(td);
198444990b8cSJulian Elischer 		}
19851279572aSDavid Xu 	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
198644990b8cSJulian Elischer 	    (p->p_numthreads == p->p_suspcount)) {
198744990b8cSJulian Elischer 		/*
198844990b8cSJulian Elischer 		 * Stopping everything also did the job for the single
198944990b8cSJulian Elischer 		 * threading request. Now we've downgraded to single-threaded,
199044990b8cSJulian Elischer 		 * let it continue.
199144990b8cSJulian Elischer 		 */
199235c32a76SDavid Xu 		thread_unsuspend_one(p->p_singlethread);
199344990b8cSJulian Elischer 	}
199444990b8cSJulian Elischer }
199544990b8cSJulian Elischer 
199644990b8cSJulian Elischer void
199744990b8cSJulian Elischer thread_single_end(void)
199844990b8cSJulian Elischer {
199944990b8cSJulian Elischer 	struct thread *td;
200044990b8cSJulian Elischer 	struct proc *p;
200144990b8cSJulian Elischer 
200244990b8cSJulian Elischer 	td = curthread;
200344990b8cSJulian Elischer 	p = td->td_proc;
200444990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
20051279572aSDavid Xu 	p->p_flag &= ~P_STOPPED_SINGLE;
2006112afcb2SJohn Baldwin 	mtx_lock_spin(&sched_lock);
200744990b8cSJulian Elischer 	p->p_singlethread = NULL;
200849539972SJulian Elischer 	/*
200949539972SJulian Elischer 	 * If there are other threads they mey now run,
201049539972SJulian Elischer 	 * unless of course there is a blanket 'stop order'
201149539972SJulian Elischer 	 * on the process. The single threader must be allowed
201249539972SJulian Elischer 	 * to continue however as this is a bad place to stop.
201349539972SJulian Elischer 	 */
201449539972SJulian Elischer 	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
201549539972SJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
201671fad9fdSJulian Elischer 			thread_unsuspend_one(td);
201744990b8cSJulian Elischer 		}
201849539972SJulian Elischer 	}
2019112afcb2SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
202049539972SJulian Elischer }
202149539972SJulian Elischer 
202244990b8cSJulian Elischer 
2023