xref: /freebsd/sys/kern/kern_thread.c (revision d3a0bd78a8a04b27703078f03877d0e74775afac)
144990b8cSJulian Elischer /*
244990b8cSJulian Elischer  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
344990b8cSJulian Elischer  *  All rights reserved.
444990b8cSJulian Elischer  *
544990b8cSJulian Elischer  * Redistribution and use in source and binary forms, with or without
644990b8cSJulian Elischer  * modification, are permitted provided that the following conditions
744990b8cSJulian Elischer  * are met:
844990b8cSJulian Elischer  * 1. Redistributions of source code must retain the above copyright
944990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer as
1044990b8cSJulian Elischer  *    the first lines of this file unmodified other than the possible
1144990b8cSJulian Elischer  *    addition of one or more copyright notices.
1244990b8cSJulian Elischer  * 2. Redistributions in binary form must reproduce the above copyright
1344990b8cSJulian Elischer  *    notice(s), this list of conditions and the following disclaimer in the
1444990b8cSJulian Elischer  *    documentation and/or other materials provided with the distribution.
1544990b8cSJulian Elischer  *
1644990b8cSJulian Elischer  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1744990b8cSJulian Elischer  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1844990b8cSJulian Elischer  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1944990b8cSJulian Elischer  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2044990b8cSJulian Elischer  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2144990b8cSJulian Elischer  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2244990b8cSJulian Elischer  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2344990b8cSJulian Elischer  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2444990b8cSJulian Elischer  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2544990b8cSJulian Elischer  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2644990b8cSJulian Elischer  * DAMAGE.
2744990b8cSJulian Elischer  *
2844990b8cSJulian Elischer  * $FreeBSD$
2944990b8cSJulian Elischer  */
3044990b8cSJulian Elischer 
3144990b8cSJulian Elischer #include <sys/param.h>
3244990b8cSJulian Elischer #include <sys/systm.h>
3344990b8cSJulian Elischer #include <sys/kernel.h>
3444990b8cSJulian Elischer #include <sys/lock.h>
3544990b8cSJulian Elischer #include <sys/malloc.h>
3644990b8cSJulian Elischer #include <sys/mutex.h>
3744990b8cSJulian Elischer #include <sys/proc.h>
38904f1b77SJulian Elischer #include <sys/smp.h>
3944990b8cSJulian Elischer #include <sys/sysctl.h>
405c8329edSJulian Elischer #include <sys/sysproto.h>
4144990b8cSJulian Elischer #include <sys/filedesc.h>
42de028f5aSJeff Roberson #include <sys/sched.h>
4344990b8cSJulian Elischer #include <sys/signalvar.h>
4444990b8cSJulian Elischer #include <sys/sx.h>
45de028f5aSJeff Roberson #include <sys/tty.h>
4644990b8cSJulian Elischer #include <sys/user.h>
4744990b8cSJulian Elischer #include <sys/jail.h>
4844990b8cSJulian Elischer #include <sys/kse.h>
4944990b8cSJulian Elischer #include <sys/ktr.h>
50c76e33b6SJonathan Mini #include <sys/ucontext.h>
5144990b8cSJulian Elischer 
5244990b8cSJulian Elischer #include <vm/vm.h>
5344990b8cSJulian Elischer #include <vm/vm_object.h>
5444990b8cSJulian Elischer #include <vm/pmap.h>
5544990b8cSJulian Elischer #include <vm/uma.h>
5644990b8cSJulian Elischer #include <vm/vm_map.h>
5744990b8cSJulian Elischer 
5802fb42b0SPeter Wemm #include <machine/frame.h>
5902fb42b0SPeter Wemm 
6044990b8cSJulian Elischer /*
614f0db5e0SJulian Elischer  * KSEGRP related storage.
6244990b8cSJulian Elischer  */
634f0db5e0SJulian Elischer static uma_zone_t ksegrp_zone;
644f0db5e0SJulian Elischer static uma_zone_t kse_zone;
6544990b8cSJulian Elischer static uma_zone_t thread_zone;
665215b187SJeff Roberson static uma_zone_t upcall_zone;
6744990b8cSJulian Elischer 
684f0db5e0SJulian Elischer /* DEBUG ONLY */
6944990b8cSJulian Elischer SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
70696058c3SJulian Elischer static int thread_debug = 0;
71696058c3SJulian Elischer SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
72696058c3SJulian Elischer 	&thread_debug, 0, "thread debug");
73fdc5ecd2SDavid Xu 
74fdc5ecd2SDavid Xu static int max_threads_per_proc = 30;
75fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
764f0db5e0SJulian Elischer 	&max_threads_per_proc, 0, "Limit on threads per proc");
774f0db5e0SJulian Elischer 
78fdc5ecd2SDavid Xu static int max_groups_per_proc = 5;
79fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
80fdc5ecd2SDavid Xu 	&max_groups_per_proc, 0, "Limit on thread groups per proc");
81fdc5ecd2SDavid Xu 
820252d203SDavid Xu static int max_threads_hits;
830252d203SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
840252d203SDavid Xu 	&max_threads_hits, 0, "");
850252d203SDavid Xu 
865215b187SJeff Roberson static int virtual_cpu;
875215b187SJeff Roberson 
8844990b8cSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
8944990b8cSJulian Elischer 
905215b187SJeff Roberson TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
915c8329edSJulian Elischer TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
925c8329edSJulian Elischer TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
935215b187SJeff Roberson TAILQ_HEAD(, kse_upcall) zombie_upcalls =
945215b187SJeff Roberson 	TAILQ_HEAD_INITIALIZER(zombie_upcalls);
955215b187SJeff Roberson struct mtx kse_zombie_lock;
965215b187SJeff Roberson MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
9744990b8cSJulian Elischer 
98696058c3SJulian Elischer static void kse_purge(struct proc *p, struct thread *td);
995215b187SJeff Roberson static void kse_purge_group(struct thread *td);
1004b4866edSDavid Xu static int thread_update_usr_ticks(struct thread *td, int user);
1015215b187SJeff Roberson static void thread_alloc_spare(struct thread *td, struct thread *spare);
1025215b187SJeff Roberson 
1035215b187SJeff Roberson static int
1045215b187SJeff Roberson sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
1055215b187SJeff Roberson {
1065215b187SJeff Roberson 	int error, new_val;
1075215b187SJeff Roberson 	int def_val;
1085215b187SJeff Roberson 
1095215b187SJeff Roberson #ifdef SMP
1105215b187SJeff Roberson 	def_val = mp_ncpus;
1115215b187SJeff Roberson #else
1125215b187SJeff Roberson 	def_val = 1;
1135215b187SJeff Roberson #endif
1145215b187SJeff Roberson 	if (virtual_cpu == 0)
1155215b187SJeff Roberson 		new_val = def_val;
1165215b187SJeff Roberson 	else
1175215b187SJeff Roberson 		new_val = virtual_cpu;
1185215b187SJeff Roberson 	error = sysctl_handle_int(oidp, &new_val, 0, req);
1195215b187SJeff Roberson         if (error != 0 || req->newptr == NULL)
1205215b187SJeff Roberson 		return (error);
1215215b187SJeff Roberson 	if (new_val < 0)
1225215b187SJeff Roberson 		return (EINVAL);
1235215b187SJeff Roberson 	virtual_cpu = new_val;
1245215b187SJeff Roberson 	return (0);
1255215b187SJeff Roberson }
1265215b187SJeff Roberson 
1275215b187SJeff Roberson /* DEBUG ONLY */
1285215b187SJeff Roberson SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
1295215b187SJeff Roberson 	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
1305215b187SJeff Roberson 	"debug virtual cpus");
1315c8329edSJulian Elischer 
13244990b8cSJulian Elischer /*
133696058c3SJulian Elischer  * Prepare a thread for use.
13444990b8cSJulian Elischer  */
13544990b8cSJulian Elischer static void
13644990b8cSJulian Elischer thread_ctor(void *mem, int size, void *arg)
13744990b8cSJulian Elischer {
13844990b8cSJulian Elischer 	struct thread	*td;
13944990b8cSJulian Elischer 
14044990b8cSJulian Elischer 	td = (struct thread *)mem;
14171fad9fdSJulian Elischer 	td->td_state = TDS_INACTIVE;
142060563ecSJulian Elischer 	td->td_oncpu	= NOCPU;
14344990b8cSJulian Elischer }
14444990b8cSJulian Elischer 
14544990b8cSJulian Elischer /*
14644990b8cSJulian Elischer  * Reclaim a thread after use.
14744990b8cSJulian Elischer  */
14844990b8cSJulian Elischer static void
14944990b8cSJulian Elischer thread_dtor(void *mem, int size, void *arg)
15044990b8cSJulian Elischer {
15144990b8cSJulian Elischer 	struct thread	*td;
15244990b8cSJulian Elischer 
15344990b8cSJulian Elischer 	td = (struct thread *)mem;
15444990b8cSJulian Elischer 
15544990b8cSJulian Elischer #ifdef INVARIANTS
15644990b8cSJulian Elischer 	/* Verify that this thread is in a safe state to free. */
15744990b8cSJulian Elischer 	switch (td->td_state) {
15871fad9fdSJulian Elischer 	case TDS_INHIBITED:
15971fad9fdSJulian Elischer 	case TDS_RUNNING:
16071fad9fdSJulian Elischer 	case TDS_CAN_RUN:
16144990b8cSJulian Elischer 	case TDS_RUNQ:
16244990b8cSJulian Elischer 		/*
16344990b8cSJulian Elischer 		 * We must never unlink a thread that is in one of
16444990b8cSJulian Elischer 		 * these states, because it is currently active.
16544990b8cSJulian Elischer 		 */
16644990b8cSJulian Elischer 		panic("bad state for thread unlinking");
16744990b8cSJulian Elischer 		/* NOTREACHED */
16871fad9fdSJulian Elischer 	case TDS_INACTIVE:
16944990b8cSJulian Elischer 		break;
17044990b8cSJulian Elischer 	default:
17144990b8cSJulian Elischer 		panic("bad thread state");
17244990b8cSJulian Elischer 		/* NOTREACHED */
17344990b8cSJulian Elischer 	}
17444990b8cSJulian Elischer #endif
17544990b8cSJulian Elischer }
17644990b8cSJulian Elischer 
17744990b8cSJulian Elischer /*
17844990b8cSJulian Elischer  * Initialize type-stable parts of a thread (when newly created).
17944990b8cSJulian Elischer  */
18044990b8cSJulian Elischer static void
18144990b8cSJulian Elischer thread_init(void *mem, int size)
18244990b8cSJulian Elischer {
18344990b8cSJulian Elischer 	struct thread	*td;
18444990b8cSJulian Elischer 
18544990b8cSJulian Elischer 	td = (struct thread *)mem;
186e6e24ff9SJulian Elischer 	mtx_lock(&Giant);
187316ec49aSScott Long 	pmap_new_thread(td, 0);
188e6e24ff9SJulian Elischer 	mtx_unlock(&Giant);
18944990b8cSJulian Elischer 	cpu_thread_setup(td);
190de028f5aSJeff Roberson 	td->td_sched = (struct td_sched *)&td[1];
19144990b8cSJulian Elischer }
19244990b8cSJulian Elischer 
19344990b8cSJulian Elischer /*
19444990b8cSJulian Elischer  * Tear down type-stable parts of a thread (just before being discarded).
19544990b8cSJulian Elischer  */
19644990b8cSJulian Elischer static void
19744990b8cSJulian Elischer thread_fini(void *mem, int size)
19844990b8cSJulian Elischer {
19944990b8cSJulian Elischer 	struct thread	*td;
20044990b8cSJulian Elischer 
20144990b8cSJulian Elischer 	td = (struct thread *)mem;
20244990b8cSJulian Elischer 	pmap_dispose_thread(td);
20344990b8cSJulian Elischer }
2045215b187SJeff Roberson 
205de028f5aSJeff Roberson /*
206de028f5aSJeff Roberson  * Initialize type-stable parts of a kse (when newly created).
207de028f5aSJeff Roberson  */
208de028f5aSJeff Roberson static void
209de028f5aSJeff Roberson kse_init(void *mem, int size)
210de028f5aSJeff Roberson {
211de028f5aSJeff Roberson 	struct kse	*ke;
212de028f5aSJeff Roberson 
213de028f5aSJeff Roberson 	ke = (struct kse *)mem;
214de028f5aSJeff Roberson 	ke->ke_sched = (struct ke_sched *)&ke[1];
215de028f5aSJeff Roberson }
2165215b187SJeff Roberson 
217de028f5aSJeff Roberson /*
218de028f5aSJeff Roberson  * Initialize type-stable parts of a ksegrp (when newly created).
219de028f5aSJeff Roberson  */
220de028f5aSJeff Roberson static void
221de028f5aSJeff Roberson ksegrp_init(void *mem, int size)
222de028f5aSJeff Roberson {
223de028f5aSJeff Roberson 	struct ksegrp	*kg;
224de028f5aSJeff Roberson 
225de028f5aSJeff Roberson 	kg = (struct ksegrp *)mem;
226de028f5aSJeff Roberson 	kg->kg_sched = (struct kg_sched *)&kg[1];
227de028f5aSJeff Roberson }
22844990b8cSJulian Elischer 
22944990b8cSJulian Elischer /*
2305215b187SJeff Roberson  * KSE is linked into kse group.
2315c8329edSJulian Elischer  */
2325c8329edSJulian Elischer void
2335c8329edSJulian Elischer kse_link(struct kse *ke, struct ksegrp *kg)
2345c8329edSJulian Elischer {
2355c8329edSJulian Elischer 	struct proc *p = kg->kg_proc;
2365c8329edSJulian Elischer 
2375c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
2385c8329edSJulian Elischer 	kg->kg_kses++;
2395c8329edSJulian Elischer 	ke->ke_state	= KES_UNQUEUED;
2405c8329edSJulian Elischer 	ke->ke_proc	= p;
2415c8329edSJulian Elischer 	ke->ke_ksegrp	= kg;
2425c8329edSJulian Elischer 	ke->ke_thread	= NULL;
2435c8329edSJulian Elischer 	ke->ke_oncpu	= NOCPU;
2445215b187SJeff Roberson 	ke->ke_flags	= 0;
2455c8329edSJulian Elischer }
2465c8329edSJulian Elischer 
2475c8329edSJulian Elischer void
2485c8329edSJulian Elischer kse_unlink(struct kse *ke)
2495c8329edSJulian Elischer {
2505c8329edSJulian Elischer 	struct ksegrp *kg;
2515c8329edSJulian Elischer 
2525c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2535c8329edSJulian Elischer 	kg = ke->ke_ksegrp;
2545c8329edSJulian Elischer 	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
2555215b187SJeff Roberson 	if (ke->ke_state == KES_IDLE) {
2565215b187SJeff Roberson 		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
2575215b187SJeff Roberson 		kg->kg_idle_kses--;
2586f8132a8SJulian Elischer 	}
2595215b187SJeff Roberson 	if (--kg->kg_kses == 0)
2605215b187SJeff Roberson 		ksegrp_unlink(kg);
2615c8329edSJulian Elischer 	/*
2625c8329edSJulian Elischer 	 * Aggregate stats from the KSE
2635c8329edSJulian Elischer 	 */
2645c8329edSJulian Elischer 	kse_stash(ke);
2655c8329edSJulian Elischer }
2665c8329edSJulian Elischer 
2675c8329edSJulian Elischer void
2685c8329edSJulian Elischer ksegrp_link(struct ksegrp *kg, struct proc *p)
2695c8329edSJulian Elischer {
2705c8329edSJulian Elischer 
2715c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_threads);
2725c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
2735c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
2745c8329edSJulian Elischer 	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
2755215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
2765215b187SJeff Roberson 	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
2775c8329edSJulian Elischer 	kg->kg_proc = p;
2785215b187SJeff Roberson 	/*
2795215b187SJeff Roberson 	 * the following counters are in the -zero- section
2805215b187SJeff Roberson 	 * and may not need clearing
2815215b187SJeff Roberson 	 */
2825c8329edSJulian Elischer 	kg->kg_numthreads = 0;
2835c8329edSJulian Elischer 	kg->kg_runnable   = 0;
2845c8329edSJulian Elischer 	kg->kg_kses       = 0;
2855c8329edSJulian Elischer 	kg->kg_runq_kses  = 0; /* XXXKSE change name */
2865215b187SJeff Roberson 	kg->kg_idle_kses  = 0;
2875215b187SJeff Roberson 	kg->kg_numupcalls = 0;
2885c8329edSJulian Elischer 	/* link it in now that it's consistent */
2895c8329edSJulian Elischer 	p->p_numksegrps++;
2905c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
2915c8329edSJulian Elischer }
2925c8329edSJulian Elischer 
2935c8329edSJulian Elischer void
2945c8329edSJulian Elischer ksegrp_unlink(struct ksegrp *kg)
2955c8329edSJulian Elischer {
2965c8329edSJulian Elischer 	struct proc *p;
2975c8329edSJulian Elischer 
2985c8329edSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2995215b187SJeff Roberson 	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
3005215b187SJeff Roberson 	KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses"));
3015215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
3025215b187SJeff Roberson 
3035c8329edSJulian Elischer 	p = kg->kg_proc;
3045c8329edSJulian Elischer 	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
3055c8329edSJulian Elischer 	p->p_numksegrps--;
3065c8329edSJulian Elischer 	/*
3075c8329edSJulian Elischer 	 * Aggregate stats from the KSE
3085c8329edSJulian Elischer 	 */
3095c8329edSJulian Elischer 	ksegrp_stash(kg);
3105c8329edSJulian Elischer }
3115c8329edSJulian Elischer 
3125215b187SJeff Roberson struct kse_upcall *
3135215b187SJeff Roberson upcall_alloc(void)
3145215b187SJeff Roberson {
3155215b187SJeff Roberson 	struct kse_upcall *ku;
3165215b187SJeff Roberson 
31730621e14SDavid Xu 	ku = uma_zalloc(upcall_zone, M_WAITOK);
3185215b187SJeff Roberson 	bzero(ku, sizeof(*ku));
3195215b187SJeff Roberson 	return (ku);
3205215b187SJeff Roberson }
3215215b187SJeff Roberson 
3225215b187SJeff Roberson void
3235215b187SJeff Roberson upcall_free(struct kse_upcall *ku)
3245215b187SJeff Roberson {
3255215b187SJeff Roberson 
3265215b187SJeff Roberson 	uma_zfree(upcall_zone, ku);
3275215b187SJeff Roberson }
3285215b187SJeff Roberson 
3295215b187SJeff Roberson void
3305215b187SJeff Roberson upcall_link(struct kse_upcall *ku, struct ksegrp *kg)
3315215b187SJeff Roberson {
3325215b187SJeff Roberson 
3335215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3345215b187SJeff Roberson 	TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link);
3355215b187SJeff Roberson 	ku->ku_ksegrp = kg;
3365215b187SJeff Roberson 	kg->kg_numupcalls++;
3375215b187SJeff Roberson }
3385215b187SJeff Roberson 
3395215b187SJeff Roberson void
3405215b187SJeff Roberson upcall_unlink(struct kse_upcall *ku)
3415215b187SJeff Roberson {
3425215b187SJeff Roberson 	struct ksegrp *kg = ku->ku_ksegrp;
3435215b187SJeff Roberson 
3445215b187SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
3455215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__));
3465215b187SJeff Roberson 	TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link);
3475215b187SJeff Roberson 	kg->kg_numupcalls--;
3485215b187SJeff Roberson 	upcall_stash(ku);
3495215b187SJeff Roberson }
3505215b187SJeff Roberson 
3515215b187SJeff Roberson void
3525215b187SJeff Roberson upcall_remove(struct thread *td)
3535215b187SJeff Roberson {
3545215b187SJeff Roberson 
3555215b187SJeff Roberson 	if (td->td_upcall) {
3565215b187SJeff Roberson 		td->td_upcall->ku_owner = NULL;
3575215b187SJeff Roberson 		upcall_unlink(td->td_upcall);
3585215b187SJeff Roberson 		td->td_upcall = 0;
3595215b187SJeff Roberson 	}
3605215b187SJeff Roberson }
3615215b187SJeff Roberson 
3625c8329edSJulian Elischer /*
3635215b187SJeff Roberson  * For a newly created process,
3645215b187SJeff Roberson  * link up all the structures and its initial threads etc.
3655c8329edSJulian Elischer  */
3665c8329edSJulian Elischer void
3675c8329edSJulian Elischer proc_linkup(struct proc *p, struct ksegrp *kg,
3685c8329edSJulian Elischer 	    struct kse *ke, struct thread *td)
3695c8329edSJulian Elischer {
3705c8329edSJulian Elischer 
3715c8329edSJulian Elischer 	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
3725c8329edSJulian Elischer 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
3735c8329edSJulian Elischer 	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
3745c8329edSJulian Elischer 	p->p_numksegrps = 0;
3755c8329edSJulian Elischer 	p->p_numthreads = 0;
3765c8329edSJulian Elischer 
3775c8329edSJulian Elischer 	ksegrp_link(kg, p);
3785c8329edSJulian Elischer 	kse_link(ke, kg);
3795c8329edSJulian Elischer 	thread_link(td, kg);
3805c8329edSJulian Elischer }
3815c8329edSJulian Elischer 
3825215b187SJeff Roberson /*
3835215b187SJeff Roberson struct kse_thr_interrupt_args {
3845215b187SJeff Roberson 	struct kse_thr_mailbox * tmbx;
3855215b187SJeff Roberson };
3865215b187SJeff Roberson */
3875c8329edSJulian Elischer int
3885c8329edSJulian Elischer kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
3895c8329edSJulian Elischer {
39034e80e02SDavid Xu 	struct proc *p;
39134e80e02SDavid Xu 	struct thread *td2;
3925c8329edSJulian Elischer 
393adac9400SDavid Xu 	p = td->td_proc;
394ac2e4153SJulian Elischer 	if (!(p->p_flag & P_THREADED) || (uap->tmbx == NULL))
3958db2431fSDavid Xu 		return (EINVAL);
39634e80e02SDavid Xu 	mtx_lock_spin(&sched_lock);
39734e80e02SDavid Xu 	FOREACH_THREAD_IN_PROC(p, td2) {
39834e80e02SDavid Xu 		if (td2->td_mailbox == uap->tmbx) {
39934e80e02SDavid Xu 			td2->td_flags |= TDF_INTERRUPT;
40034e80e02SDavid Xu 			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) {
40134e80e02SDavid Xu 				if (td2->td_flags & TDF_CVWAITQ)
40234e80e02SDavid Xu 					cv_abort(td2);
40334e80e02SDavid Xu 				else
40434e80e02SDavid Xu 					abortsleep(td2);
40534e80e02SDavid Xu 			}
40634e80e02SDavid Xu 			mtx_unlock_spin(&sched_lock);
4077b290dd0SDavid Xu 			return (0);
40834e80e02SDavid Xu 		}
40934e80e02SDavid Xu 	}
41034e80e02SDavid Xu 	mtx_unlock_spin(&sched_lock);
41134e80e02SDavid Xu 	return (ESRCH);
4125c8329edSJulian Elischer }
4135c8329edSJulian Elischer 
4145215b187SJeff Roberson /*
4155215b187SJeff Roberson struct kse_exit_args {
4165215b187SJeff Roberson 	register_t dummy;
4175215b187SJeff Roberson };
4185215b187SJeff Roberson */
4195c8329edSJulian Elischer int
4205c8329edSJulian Elischer kse_exit(struct thread *td, struct kse_exit_args *uap)
4215c8329edSJulian Elischer {
4225c8329edSJulian Elischer 	struct proc *p;
4235c8329edSJulian Elischer 	struct ksegrp *kg;
424450c38d0SDavid Xu 	struct kse *ke;
4255c8329edSJulian Elischer 
4265c8329edSJulian Elischer 	p = td->td_proc;
4275215b187SJeff Roberson 	/*
4285215b187SJeff Roberson 	 * Only UTS can call the syscall and current group
4295215b187SJeff Roberson 	 * should be a threaded group.
4305215b187SJeff Roberson 	 */
4315215b187SJeff Roberson 	if ((td->td_mailbox != NULL) || (td->td_ksegrp->kg_numupcalls == 0))
4327b290dd0SDavid Xu 		return (EINVAL);
4335215b187SJeff Roberson 	KASSERT((td->td_upcall != NULL), ("%s: not own an upcall", __func__));
4345215b187SJeff Roberson 
4355c8329edSJulian Elischer 	kg = td->td_ksegrp;
4365215b187SJeff Roberson 	/* Serialize removing upcall */
4375c8329edSJulian Elischer 	PROC_LOCK(p);
4385c8329edSJulian Elischer 	mtx_lock_spin(&sched_lock);
4395215b187SJeff Roberson 	if ((kg->kg_numupcalls == 1) && (kg->kg_numthreads > 1)) {
4405c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
4415c8329edSJulian Elischer 		PROC_UNLOCK(p);
4425c8329edSJulian Elischer 		return (EDEADLK);
4435c8329edSJulian Elischer 	}
444450c38d0SDavid Xu 	ke = td->td_kse;
4455215b187SJeff Roberson 	upcall_remove(td);
446450c38d0SDavid Xu 	if (p->p_numthreads == 1) {
4475215b187SJeff Roberson 		kse_purge(p, td);
448ac2e4153SJulian Elischer 		p->p_flag &= ~P_THREADED;
4495c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
4505c8329edSJulian Elischer 		PROC_UNLOCK(p);
4515c8329edSJulian Elischer 	} else {
4525215b187SJeff Roberson 		if (kg->kg_numthreads == 1) { /* Shutdown a group */
4535215b187SJeff Roberson 			kse_purge_group(td);
454450c38d0SDavid Xu 			ke->ke_flags |= KEF_EXIT;
4555215b187SJeff Roberson 		}
456e574e444SDavid Xu 		thread_stopped(p);
4575c8329edSJulian Elischer 		thread_exit();
4585c8329edSJulian Elischer 		/* NOTREACHED */
4595c8329edSJulian Elischer 	}
4607b290dd0SDavid Xu 	return (0);
4615c8329edSJulian Elischer }
4625c8329edSJulian Elischer 
463696058c3SJulian Elischer /*
46493a7aa79SJulian Elischer  * Either becomes an upcall or waits for an awakening event and
4655215b187SJeff Roberson  * then becomes an upcall. Only error cases return.
4665215b187SJeff Roberson  */
4675215b187SJeff Roberson /*
4685215b187SJeff Roberson struct kse_release_args {
469eb117d5cSDavid Xu 	struct timespec *timeout;
4705215b187SJeff Roberson };
471696058c3SJulian Elischer */
4725c8329edSJulian Elischer int
4735c8329edSJulian Elischer kse_release(struct thread *td, struct kse_release_args *uap)
4745c8329edSJulian Elischer {
4755c8329edSJulian Elischer 	struct proc *p;
476696058c3SJulian Elischer 	struct ksegrp *kg;
477eb117d5cSDavid Xu 	struct timespec ts, ts2, ts3, timeout;
478eb117d5cSDavid Xu 	struct timeval tv;
479eb117d5cSDavid Xu 	int error;
4805c8329edSJulian Elischer 
4815c8329edSJulian Elischer 	p = td->td_proc;
482696058c3SJulian Elischer 	kg = td->td_ksegrp;
483ca161eb6SDavid Xu 	/*
4845215b187SJeff Roberson 	 * Only UTS can call the syscall and current group
4855215b187SJeff Roberson 	 * should be a threaded group.
486ca161eb6SDavid Xu 	 */
4875215b187SJeff Roberson 	if ((td->td_mailbox != NULL) || (td->td_ksegrp->kg_numupcalls == 0))
4885c8329edSJulian Elischer 		return (EINVAL);
4895215b187SJeff Roberson 	KASSERT((td->td_upcall != NULL), ("%s: not own an upcall", __func__));
490eb117d5cSDavid Xu 	if (uap->timeout != NULL) {
491eb117d5cSDavid Xu 		if ((error = copyin(uap->timeout, &timeout, sizeof(timeout))))
492eb117d5cSDavid Xu 			return (error);
493eb117d5cSDavid Xu 		getnanouptime(&ts);
494eb117d5cSDavid Xu 		timespecadd(&ts, &timeout);
495eb117d5cSDavid Xu 		TIMESPEC_TO_TIMEVAL(&tv, &timeout);
496eb117d5cSDavid Xu 	}
49703ea4720SJulian Elischer 	mtx_lock_spin(&sched_lock);
49893a7aa79SJulian Elischer 	/* Change OURSELF to become an upcall. */
4995215b187SJeff Roberson 	td->td_flags = TDF_UPCALLING;
5004093529dSJeff Roberson #if 0	/* XXX This shouldn't be necessary */
50188aba94cSDavid Xu 	if (p->p_sflag & PS_NEEDSIGCHK)
50288aba94cSDavid Xu 		td->td_flags |= TDF_ASTPENDING;
5034093529dSJeff Roberson #endif
504eb117d5cSDavid Xu 	mtx_unlock_spin(&sched_lock);
505eb117d5cSDavid Xu 	PROC_LOCK(p);
506eb117d5cSDavid Xu 	while ((td->td_upcall->ku_flags & KUF_DOUPCALL) == 0 &&
50703ea4720SJulian Elischer 	       (kg->kg_completed == NULL)) {
5085215b187SJeff Roberson 		kg->kg_upsleeps++;
509eb117d5cSDavid Xu 		error = msleep(&kg->kg_completed, &p->p_mtx, PPAUSE|PCATCH,
510eb117d5cSDavid Xu 			"kse_rel", (uap->timeout ? tvtohz(&tv) : 0));
5115215b187SJeff Roberson 		kg->kg_upsleeps--;
5125215b187SJeff Roberson 		PROC_UNLOCK(p);
513eb117d5cSDavid Xu 		if (uap->timeout == NULL || error != EWOULDBLOCK)
514eb117d5cSDavid Xu 			return (0);
515eb117d5cSDavid Xu 		getnanouptime(&ts2);
516eb117d5cSDavid Xu 		if (timespeccmp(&ts2, &ts, >=))
517eb117d5cSDavid Xu 			return (0);
518eb117d5cSDavid Xu 		ts3 = ts;
519eb117d5cSDavid Xu 		timespecsub(&ts3, &ts2);
520eb117d5cSDavid Xu 		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
521eb117d5cSDavid Xu 		PROC_LOCK(p);
52293a7aa79SJulian Elischer 	}
523eb117d5cSDavid Xu 	PROC_UNLOCK(p);
524696058c3SJulian Elischer 	return (0);
5255c8329edSJulian Elischer }
5265c8329edSJulian Elischer 
5275c8329edSJulian Elischer /* struct kse_wakeup_args {
5285c8329edSJulian Elischer 	struct kse_mailbox *mbx;
5295c8329edSJulian Elischer }; */
5305c8329edSJulian Elischer int
5315c8329edSJulian Elischer kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
5325c8329edSJulian Elischer {
5335c8329edSJulian Elischer 	struct proc *p;
5345c8329edSJulian Elischer 	struct ksegrp *kg;
5355215b187SJeff Roberson 	struct kse_upcall *ku;
53693a7aa79SJulian Elischer 	struct thread *td2;
5375c8329edSJulian Elischer 
5385c8329edSJulian Elischer 	p = td->td_proc;
53993a7aa79SJulian Elischer 	td2 = NULL;
5405215b187SJeff Roberson 	ku = NULL;
5415c8329edSJulian Elischer 	/* KSE-enabled processes only, please. */
542ac2e4153SJulian Elischer 	if (!(p->p_flag & P_THREADED))
5435215b187SJeff Roberson 		return (EINVAL);
5445215b187SJeff Roberson 	PROC_LOCK(p);
54503ea4720SJulian Elischer 	mtx_lock_spin(&sched_lock);
5465c8329edSJulian Elischer 	if (uap->mbx) {
5475c8329edSJulian Elischer 		FOREACH_KSEGRP_IN_PROC(p, kg) {
5485215b187SJeff Roberson 			FOREACH_UPCALL_IN_GROUP(kg, ku) {
5495215b187SJeff Roberson 				if (ku->ku_mailbox == uap->mbx)
55093a7aa79SJulian Elischer 					break;
55193a7aa79SJulian Elischer 			}
5525215b187SJeff Roberson 			if (ku)
55393a7aa79SJulian Elischer 				break;
5545c8329edSJulian Elischer 		}
5555c8329edSJulian Elischer 	} else {
5565c8329edSJulian Elischer 		kg = td->td_ksegrp;
5575215b187SJeff Roberson 		if (kg->kg_upsleeps) {
5585215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
5595215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
5605215b187SJeff Roberson 			PROC_UNLOCK(p);
5615215b187SJeff Roberson 			return (0);
5625c8329edSJulian Elischer 		}
5635215b187SJeff Roberson 		ku = TAILQ_FIRST(&kg->kg_upcalls);
5645c8329edSJulian Elischer 	}
5655215b187SJeff Roberson 	if (ku) {
5665215b187SJeff Roberson 		if ((td2 = ku->ku_owner) == NULL) {
5675215b187SJeff Roberson 			panic("%s: no owner", __func__);
5685215b187SJeff Roberson 		} else if (TD_ON_SLEEPQ(td2) &&
5695215b187SJeff Roberson 		           (td2->td_wchan == &kg->kg_completed)) {
5705215b187SJeff Roberson 			abortsleep(td2);
5715215b187SJeff Roberson 		} else {
5725215b187SJeff Roberson 			ku->ku_flags |= KUF_DOUPCALL;
57303ea4720SJulian Elischer 		}
5745c8329edSJulian Elischer 		mtx_unlock_spin(&sched_lock);
5755215b187SJeff Roberson 		PROC_UNLOCK(p);
5767b290dd0SDavid Xu 		return (0);
5775c8329edSJulian Elischer 	}
57893a7aa79SJulian Elischer 	mtx_unlock_spin(&sched_lock);
5795215b187SJeff Roberson 	PROC_UNLOCK(p);
58093a7aa79SJulian Elischer 	return (ESRCH);
58193a7aa79SJulian Elischer }
5825c8329edSJulian Elischer 
5835c8329edSJulian Elischer /*
5845c8329edSJulian Elischer  * No new KSEG: first call: use current KSE, don't schedule an upcall
5855215b187SJeff Roberson  * All other situations, do allocate max new KSEs and schedule an upcall.
5865c8329edSJulian Elischer  */
5875c8329edSJulian Elischer /* struct kse_create_args {
5885c8329edSJulian Elischer 	struct kse_mailbox *mbx;
5895c8329edSJulian Elischer 	int newgroup;
5905c8329edSJulian Elischer }; */
5915c8329edSJulian Elischer int
5925c8329edSJulian Elischer kse_create(struct thread *td, struct kse_create_args *uap)
5935c8329edSJulian Elischer {
5945c8329edSJulian Elischer 	struct kse *newke;
5955c8329edSJulian Elischer 	struct ksegrp *newkg;
5965c8329edSJulian Elischer 	struct ksegrp *kg;
5975c8329edSJulian Elischer 	struct proc *p;
5985c8329edSJulian Elischer 	struct kse_mailbox mbx;
5995215b187SJeff Roberson 	struct kse_upcall *newku;
6005215b187SJeff Roberson 	int err, ncpus;
6015c8329edSJulian Elischer 
6025c8329edSJulian Elischer 	p = td->td_proc;
6035c8329edSJulian Elischer 	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
6045c8329edSJulian Elischer 		return (err);
6055c8329edSJulian Elischer 
6065215b187SJeff Roberson 	/* Too bad, why hasn't kernel always a cpu counter !? */
6075215b187SJeff Roberson #ifdef SMP
6085215b187SJeff Roberson 	ncpus = mp_ncpus;
6095215b187SJeff Roberson #else
6105215b187SJeff Roberson 	ncpus = 1;
6115215b187SJeff Roberson #endif
6125215b187SJeff Roberson 	if (thread_debug && virtual_cpu != 0)
6135215b187SJeff Roberson 		ncpus = virtual_cpu;
6145215b187SJeff Roberson 
6155215b187SJeff Roberson 	/* Easier to just set it than to test and set */
616661db6daSDavid Xu 	PROC_LOCK(p);
617ac2e4153SJulian Elischer 	p->p_flag |= P_THREADED;
618661db6daSDavid Xu 	PROC_UNLOCK(p);
6195c8329edSJulian Elischer 	kg = td->td_ksegrp;
6205c8329edSJulian Elischer 	if (uap->newgroup) {
6215215b187SJeff Roberson 		/* Have race condition but it is cheap */
622fdc5ecd2SDavid Xu 		if (p->p_numksegrps >= max_groups_per_proc)
623fdc5ecd2SDavid Xu 			return (EPROCLIM);
6245c8329edSJulian Elischer 		/*
6255c8329edSJulian Elischer 		 * If we want a new KSEGRP it doesn't matter whether
6265c8329edSJulian Elischer 		 * we have already fired up KSE mode before or not.
6275215b187SJeff Roberson 		 * We put the process in KSE mode and create a new KSEGRP.
6285c8329edSJulian Elischer 		 */
6295c8329edSJulian Elischer 		newkg = ksegrp_alloc();
6305c8329edSJulian Elischer 		bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
6315c8329edSJulian Elischer 		      kg_startzero, kg_endzero));
6325c8329edSJulian Elischer 		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
6335c8329edSJulian Elischer 		      RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
6345215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
6355215b187SJeff Roberson 		if (p->p_numksegrps >= max_groups_per_proc) {
6365215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
6379948c47fSDavid Xu 			ksegrp_free(newkg);
6386f8132a8SJulian Elischer 			return (EPROCLIM);
6396f8132a8SJulian Elischer 		}
6409948c47fSDavid Xu 		ksegrp_link(newkg, p);
6415215b187SJeff Roberson 		mtx_unlock_spin(&sched_lock);
6426f8132a8SJulian Elischer 	} else {
6435215b187SJeff Roberson 		newkg = kg;
6446f8132a8SJulian Elischer 	}
6455215b187SJeff Roberson 
6465215b187SJeff Roberson 	/*
6475215b187SJeff Roberson 	 * Creating upcalls more than number of physical cpu does
6485215b187SJeff Roberson 	 * not help performance.
6495215b187SJeff Roberson 	 */
6505215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus)
6515215b187SJeff Roberson 		return (EPROCLIM);
6525215b187SJeff Roberson 
6535215b187SJeff Roberson 	if (newkg->kg_numupcalls == 0) {
6545215b187SJeff Roberson 		/*
6555215b187SJeff Roberson 		 * Initialize KSE group, optimized for MP.
6565215b187SJeff Roberson 		 * Create KSEs as many as physical cpus, this increases
6575215b187SJeff Roberson 		 * concurrent even if userland is not MP safe and can only run
6585215b187SJeff Roberson 		 * on single CPU (for early version of libpthread, it is true).
6595215b187SJeff Roberson 		 * In ideal world, every physical cpu should execute a thread.
6605215b187SJeff Roberson 		 * If there is enough KSEs, threads in kernel can be
6615215b187SJeff Roberson 		 * executed parallel on different cpus with full speed,
6625215b187SJeff Roberson 		 * Concurrent in kernel shouldn't be restricted by number of
6635215b187SJeff Roberson 		 * upcalls userland provides.
6645215b187SJeff Roberson 		 * Adding more upcall structures only increases concurrent
6655215b187SJeff Roberson 		 * in userland.
6665215b187SJeff Roberson 		 * Highest performance configuration is:
6675215b187SJeff Roberson 		 * N kses = N upcalls = N phyiscal cpus
6685215b187SJeff Roberson 		 */
6695215b187SJeff Roberson 		while (newkg->kg_kses < ncpus) {
6705215b187SJeff Roberson 			newke = kse_alloc();
6715c8329edSJulian Elischer 			bzero(&newke->ke_startzero, RANGEOF(struct kse,
6725c8329edSJulian Elischer 			      ke_startzero, ke_endzero));
6735c8329edSJulian Elischer #if 0
6745215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
6755c8329edSJulian Elischer 			bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
6765c8329edSJulian Elischer 			      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
6776f8132a8SJulian Elischer 			mtx_unlock_spin(&sched_lock);
6785215b187SJeff Roberson #endif
6795215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
6805c8329edSJulian Elischer 			kse_link(newke, newkg);
6815215b187SJeff Roberson 			/* Add engine */
6825215b187SJeff Roberson 			kse_reassign(newke);
6835c8329edSJulian Elischer 			mtx_unlock_spin(&sched_lock);
6845215b187SJeff Roberson 		}
6855215b187SJeff Roberson 	}
6865215b187SJeff Roberson 	newku = upcall_alloc();
6875215b187SJeff Roberson 	newku->ku_mailbox = uap->mbx;
6885215b187SJeff Roberson 	newku->ku_func = mbx.km_func;
6895215b187SJeff Roberson 	bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t));
6905215b187SJeff Roberson 
6915215b187SJeff Roberson 	/* For the first call this may not have been set */
6925215b187SJeff Roberson 	if (td->td_standin == NULL)
6935215b187SJeff Roberson 		thread_alloc_spare(td, NULL);
6945215b187SJeff Roberson 
6955215b187SJeff Roberson 	mtx_lock_spin(&sched_lock);
6965215b187SJeff Roberson 	if (newkg->kg_numupcalls >= ncpus) {
6975215b187SJeff Roberson 		mtx_unlock_spin(&sched_lock);
6983b3df40fSDavid Xu 		upcall_free(newku);
6995215b187SJeff Roberson 		return (EPROCLIM);
7005215b187SJeff Roberson 	}
7015215b187SJeff Roberson 	upcall_link(newku, newkg);
7026ce75196SDavid Xu 	if (mbx.km_quantum)
7036ce75196SDavid Xu 		newkg->kg_upquantum = max(1, mbx.km_quantum/tick);
7045215b187SJeff Roberson 
7055215b187SJeff Roberson 	/*
7065215b187SJeff Roberson 	 * Each upcall structure has an owner thread, find which
7075215b187SJeff Roberson 	 * one owns it.
7085215b187SJeff Roberson 	 */
7095215b187SJeff Roberson 	if (uap->newgroup) {
7105215b187SJeff Roberson 		/*
7115215b187SJeff Roberson 		 * Because new ksegrp hasn't thread,
7125215b187SJeff Roberson 		 * create an initial upcall thread to own it.
7135215b187SJeff Roberson 		 */
7145215b187SJeff Roberson 		thread_schedule_upcall(td, newku);
7155c8329edSJulian Elischer 	} else {
7165c8329edSJulian Elischer 		/*
7175215b187SJeff Roberson 		 * If current thread hasn't an upcall structure,
7185215b187SJeff Roberson 		 * just assign the upcall to it.
7195c8329edSJulian Elischer 		 */
7205215b187SJeff Roberson 		if (td->td_upcall == NULL) {
7215215b187SJeff Roberson 			newku->ku_owner = td;
7225215b187SJeff Roberson 			td->td_upcall = newku;
7235215b187SJeff Roberson 		} else {
7245c8329edSJulian Elischer 			/*
7255215b187SJeff Roberson 			 * Create a new upcall thread to own it.
7265c8329edSJulian Elischer 			 */
7275215b187SJeff Roberson 			thread_schedule_upcall(td, newku);
7285215b187SJeff Roberson 		}
7295215b187SJeff Roberson 	}
7305215b187SJeff Roberson 	mtx_unlock_spin(&sched_lock);
7315c8329edSJulian Elischer 	return (0);
7325c8329edSJulian Elischer }
7335c8329edSJulian Elischer 
7345c8329edSJulian Elischer /*
735c76e33b6SJonathan Mini  * Fill a ucontext_t with a thread's context information.
736c76e33b6SJonathan Mini  *
737c76e33b6SJonathan Mini  * This is an analogue to getcontext(3).
738c76e33b6SJonathan Mini  */
739c76e33b6SJonathan Mini void
740c76e33b6SJonathan Mini thread_getcontext(struct thread *td, ucontext_t *uc)
741c76e33b6SJonathan Mini {
742c76e33b6SJonathan Mini 
743acaa1566SPeter Wemm /*
744acaa1566SPeter Wemm  * XXX this is declared in a MD include file, i386/include/ucontext.h but
745acaa1566SPeter Wemm  * is used in MI code.
746acaa1566SPeter Wemm  */
7471e19df33SPeter Wemm #ifdef __i386__
748c76e33b6SJonathan Mini 	get_mcontext(td, &uc->uc_mcontext);
7491e19df33SPeter Wemm #endif
75094df4b85SJohn Baldwin 	PROC_LOCK(td->td_proc);
7514093529dSJeff Roberson 	uc->uc_sigmask = td->td_sigmask;
75294df4b85SJohn Baldwin 	PROC_UNLOCK(td->td_proc);
753c76e33b6SJonathan Mini }
754c76e33b6SJonathan Mini 
755c76e33b6SJonathan Mini /*
756c76e33b6SJonathan Mini  * Set a thread's context from a ucontext_t.
757c76e33b6SJonathan Mini  *
758c76e33b6SJonathan Mini  * This is an analogue to setcontext(3).
759c76e33b6SJonathan Mini  */
760c76e33b6SJonathan Mini int
761c76e33b6SJonathan Mini thread_setcontext(struct thread *td, ucontext_t *uc)
762c76e33b6SJonathan Mini {
763c76e33b6SJonathan Mini 	int ret;
764c76e33b6SJonathan Mini 
765acaa1566SPeter Wemm /*
766acaa1566SPeter Wemm  * XXX this is declared in a MD include file, i386/include/ucontext.h but
767acaa1566SPeter Wemm  * is used in MI code.
768acaa1566SPeter Wemm  */
7691e19df33SPeter Wemm #ifdef __i386__
770c76e33b6SJonathan Mini 	ret = set_mcontext(td, &uc->uc_mcontext);
7711e19df33SPeter Wemm #else
7721e19df33SPeter Wemm 	ret = ENOSYS;
7731e19df33SPeter Wemm #endif
774c76e33b6SJonathan Mini 	if (ret == 0) {
775c76e33b6SJonathan Mini 		SIG_CANTMASK(uc->uc_sigmask);
776c76e33b6SJonathan Mini 		PROC_LOCK(td->td_proc);
7774093529dSJeff Roberson 		td->td_sigmask = uc->uc_sigmask;
778c76e33b6SJonathan Mini 		PROC_UNLOCK(td->td_proc);
779c76e33b6SJonathan Mini 	}
780c76e33b6SJonathan Mini 	return (ret);
781c76e33b6SJonathan Mini }
782c76e33b6SJonathan Mini 
783c76e33b6SJonathan Mini /*
78444990b8cSJulian Elischer  * Initialize global thread allocation resources.
78544990b8cSJulian Elischer  */
78644990b8cSJulian Elischer void
78744990b8cSJulian Elischer threadinit(void)
78844990b8cSJulian Elischer {
78944990b8cSJulian Elischer 
790c281972eSPeter Wemm #ifndef __ia64__
791de028f5aSJeff Roberson 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
79244990b8cSJulian Elischer 	    thread_ctor, thread_dtor, thread_init, thread_fini,
79344990b8cSJulian Elischer 	    UMA_ALIGN_CACHE, 0);
794c281972eSPeter Wemm #else
795c281972eSPeter Wemm 	/*
796c281972eSPeter Wemm 	 * XXX the ia64 kstack allocator is really lame and is at the mercy
797c281972eSPeter Wemm 	 * of contigmallloc().  This hackery is to pre-construct a whole
798c281972eSPeter Wemm 	 * pile of thread structures with associated kernel stacks early
799c281972eSPeter Wemm 	 * in the system startup while contigmalloc() still works. Once we
800c281972eSPeter Wemm 	 * have them, keep them.  Sigh.
801c281972eSPeter Wemm 	 */
802de028f5aSJeff Roberson 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
803c281972eSPeter Wemm 	    thread_ctor, thread_dtor, thread_init, thread_fini,
804c281972eSPeter Wemm 	    UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
805c281972eSPeter Wemm 	uma_prealloc(thread_zone, 512);		/* XXX arbitary */
806c281972eSPeter Wemm #endif
807de028f5aSJeff Roberson 	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
808de028f5aSJeff Roberson 	    NULL, NULL, ksegrp_init, NULL,
8094f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
810de028f5aSJeff Roberson 	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
811de028f5aSJeff Roberson 	    NULL, NULL, kse_init, NULL,
8124f0db5e0SJulian Elischer 	    UMA_ALIGN_CACHE, 0);
8135215b187SJeff Roberson 	upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall),
8145215b187SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
81544990b8cSJulian Elischer }
81644990b8cSJulian Elischer 
81744990b8cSJulian Elischer /*
8181faf202eSJulian Elischer  * Stash an embarasingly extra thread into the zombie thread queue.
81944990b8cSJulian Elischer  */
82044990b8cSJulian Elischer void
82144990b8cSJulian Elischer thread_stash(struct thread *td)
82244990b8cSJulian Elischer {
8235215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
82444990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
8255215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
82644990b8cSJulian Elischer }
82744990b8cSJulian Elischer 
82844990b8cSJulian Elischer /*
8295c8329edSJulian Elischer  * Stash an embarasingly extra kse into the zombie kse queue.
8305c8329edSJulian Elischer  */
8315c8329edSJulian Elischer void
8325c8329edSJulian Elischer kse_stash(struct kse *ke)
8335c8329edSJulian Elischer {
8345215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
8355c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
8365215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
8375215b187SJeff Roberson }
8385215b187SJeff Roberson 
8395215b187SJeff Roberson /*
8405215b187SJeff Roberson  * Stash an embarasingly extra upcall into the zombie upcall queue.
8415215b187SJeff Roberson  */
8425215b187SJeff Roberson 
8435215b187SJeff Roberson void
8445215b187SJeff Roberson upcall_stash(struct kse_upcall *ku)
8455215b187SJeff Roberson {
8465215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
8475215b187SJeff Roberson 	TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link);
8485215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
8495c8329edSJulian Elischer }
8505c8329edSJulian Elischer 
8515c8329edSJulian Elischer /*
8525c8329edSJulian Elischer  * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
8535c8329edSJulian Elischer  */
8545c8329edSJulian Elischer void
8555c8329edSJulian Elischer ksegrp_stash(struct ksegrp *kg)
8565c8329edSJulian Elischer {
8575215b187SJeff Roberson 	mtx_lock_spin(&kse_zombie_lock);
8585c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
8595215b187SJeff Roberson 	mtx_unlock_spin(&kse_zombie_lock);
8605c8329edSJulian Elischer }
8615c8329edSJulian Elischer 
8625c8329edSJulian Elischer /*
8635215b187SJeff Roberson  * Reap zombie kse resource.
86444990b8cSJulian Elischer  */
86544990b8cSJulian Elischer void
86644990b8cSJulian Elischer thread_reap(void)
86744990b8cSJulian Elischer {
8685c8329edSJulian Elischer 	struct thread *td_first, *td_next;
8695c8329edSJulian Elischer 	struct kse *ke_first, *ke_next;
8705c8329edSJulian Elischer 	struct ksegrp *kg_first, * kg_next;
8715215b187SJeff Roberson 	struct kse_upcall *ku_first, *ku_next;
87244990b8cSJulian Elischer 
87344990b8cSJulian Elischer 	/*
8745215b187SJeff Roberson 	 * Don't even bother to lock if none at this instant,
8755215b187SJeff Roberson 	 * we really don't care about the next instant..
87644990b8cSJulian Elischer 	 */
8775c8329edSJulian Elischer 	if ((!TAILQ_EMPTY(&zombie_threads))
8785c8329edSJulian Elischer 	    || (!TAILQ_EMPTY(&zombie_kses))
8795215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_ksegrps))
8805215b187SJeff Roberson 	    || (!TAILQ_EMPTY(&zombie_upcalls))) {
8815215b187SJeff Roberson 		mtx_lock_spin(&kse_zombie_lock);
8825c8329edSJulian Elischer 		td_first = TAILQ_FIRST(&zombie_threads);
8835c8329edSJulian Elischer 		ke_first = TAILQ_FIRST(&zombie_kses);
8845c8329edSJulian Elischer 		kg_first = TAILQ_FIRST(&zombie_ksegrps);
8855215b187SJeff Roberson 		ku_first = TAILQ_FIRST(&zombie_upcalls);
8865c8329edSJulian Elischer 		if (td_first)
8875c8329edSJulian Elischer 			TAILQ_INIT(&zombie_threads);
8885c8329edSJulian Elischer 		if (ke_first)
8895c8329edSJulian Elischer 			TAILQ_INIT(&zombie_kses);
8905c8329edSJulian Elischer 		if (kg_first)
8915c8329edSJulian Elischer 			TAILQ_INIT(&zombie_ksegrps);
8925215b187SJeff Roberson 		if (ku_first)
8935215b187SJeff Roberson 			TAILQ_INIT(&zombie_upcalls);
8945215b187SJeff Roberson 		mtx_unlock_spin(&kse_zombie_lock);
8955c8329edSJulian Elischer 		while (td_first) {
8965c8329edSJulian Elischer 			td_next = TAILQ_NEXT(td_first, td_runq);
8975215b187SJeff Roberson 			if (td_first->td_ucred)
8985215b187SJeff Roberson 				crfree(td_first->td_ucred);
8995c8329edSJulian Elischer 			thread_free(td_first);
9005c8329edSJulian Elischer 			td_first = td_next;
90144990b8cSJulian Elischer 		}
9025c8329edSJulian Elischer 		while (ke_first) {
9035c8329edSJulian Elischer 			ke_next = TAILQ_NEXT(ke_first, ke_procq);
9045c8329edSJulian Elischer 			kse_free(ke_first);
9055c8329edSJulian Elischer 			ke_first = ke_next;
9065c8329edSJulian Elischer 		}
9075c8329edSJulian Elischer 		while (kg_first) {
9085c8329edSJulian Elischer 			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
9095c8329edSJulian Elischer 			ksegrp_free(kg_first);
9105c8329edSJulian Elischer 			kg_first = kg_next;
9115c8329edSJulian Elischer 		}
9125215b187SJeff Roberson 		while (ku_first) {
9135215b187SJeff Roberson 			ku_next = TAILQ_NEXT(ku_first, ku_link);
9145215b187SJeff Roberson 			upcall_free(ku_first);
9155215b187SJeff Roberson 			ku_first = ku_next;
9165215b187SJeff Roberson 		}
91744990b8cSJulian Elischer 	}
91844990b8cSJulian Elischer }
91944990b8cSJulian Elischer 
92044990b8cSJulian Elischer /*
9214f0db5e0SJulian Elischer  * Allocate a ksegrp.
9224f0db5e0SJulian Elischer  */
9234f0db5e0SJulian Elischer struct ksegrp *
9244f0db5e0SJulian Elischer ksegrp_alloc(void)
9254f0db5e0SJulian Elischer {
926a163d034SWarner Losh 	return (uma_zalloc(ksegrp_zone, M_WAITOK));
9274f0db5e0SJulian Elischer }
9284f0db5e0SJulian Elischer 
9294f0db5e0SJulian Elischer /*
9304f0db5e0SJulian Elischer  * Allocate a kse.
9314f0db5e0SJulian Elischer  */
9324f0db5e0SJulian Elischer struct kse *
9334f0db5e0SJulian Elischer kse_alloc(void)
9344f0db5e0SJulian Elischer {
935a163d034SWarner Losh 	return (uma_zalloc(kse_zone, M_WAITOK));
9364f0db5e0SJulian Elischer }
9374f0db5e0SJulian Elischer 
9384f0db5e0SJulian Elischer /*
93944990b8cSJulian Elischer  * Allocate a thread.
94044990b8cSJulian Elischer  */
94144990b8cSJulian Elischer struct thread *
94244990b8cSJulian Elischer thread_alloc(void)
94344990b8cSJulian Elischer {
94444990b8cSJulian Elischer 	thread_reap(); /* check if any zombies to get */
945a163d034SWarner Losh 	return (uma_zalloc(thread_zone, M_WAITOK));
94644990b8cSJulian Elischer }
94744990b8cSJulian Elischer 
94844990b8cSJulian Elischer /*
9494f0db5e0SJulian Elischer  * Deallocate a ksegrp.
9504f0db5e0SJulian Elischer  */
9514f0db5e0SJulian Elischer void
9524f0db5e0SJulian Elischer ksegrp_free(struct ksegrp *td)
9534f0db5e0SJulian Elischer {
9544f0db5e0SJulian Elischer 	uma_zfree(ksegrp_zone, td);
9554f0db5e0SJulian Elischer }
9564f0db5e0SJulian Elischer 
9574f0db5e0SJulian Elischer /*
9584f0db5e0SJulian Elischer  * Deallocate a kse.
9594f0db5e0SJulian Elischer  */
9604f0db5e0SJulian Elischer void
9614f0db5e0SJulian Elischer kse_free(struct kse *td)
9624f0db5e0SJulian Elischer {
9634f0db5e0SJulian Elischer 	uma_zfree(kse_zone, td);
9644f0db5e0SJulian Elischer }
9654f0db5e0SJulian Elischer 
9664f0db5e0SJulian Elischer /*
96744990b8cSJulian Elischer  * Deallocate a thread.
96844990b8cSJulian Elischer  */
96944990b8cSJulian Elischer void
97044990b8cSJulian Elischer thread_free(struct thread *td)
97144990b8cSJulian Elischer {
972696058c3SJulian Elischer 
973696058c3SJulian Elischer 	cpu_thread_clean(td);
97444990b8cSJulian Elischer 	uma_zfree(thread_zone, td);
97544990b8cSJulian Elischer }
97644990b8cSJulian Elischer 
97744990b8cSJulian Elischer /*
97844990b8cSJulian Elischer  * Store the thread context in the UTS's mailbox.
9793d0586d4SJulian Elischer  * then add the mailbox at the head of a list we are building in user space.
9803d0586d4SJulian Elischer  * The list is anchored in the ksegrp structure.
98144990b8cSJulian Elischer  */
98244990b8cSJulian Elischer int
98344990b8cSJulian Elischer thread_export_context(struct thread *td)
98444990b8cSJulian Elischer {
9850d294460SJuli Mallett 	struct proc *p;
9863d0586d4SJulian Elischer 	struct ksegrp *kg;
9873d0586d4SJulian Elischer 	uintptr_t mbx;
9883d0586d4SJulian Elischer 	void *addr;
9895215b187SJeff Roberson 	int error,temp;
990c76e33b6SJonathan Mini 	ucontext_t uc;
99144990b8cSJulian Elischer 
9920d294460SJuli Mallett 	p = td->td_proc;
9930d294460SJuli Mallett 	kg = td->td_ksegrp;
9940d294460SJuli Mallett 
995c76e33b6SJonathan Mini 	/* Export the user/machine context. */
9963d0586d4SJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_context);
9973d0586d4SJulian Elischer 	error = copyin(addr, &uc, sizeof(ucontext_t));
99893a7aa79SJulian Elischer 	if (error)
99993a7aa79SJulian Elischer 		goto bad;
100093a7aa79SJulian Elischer 
1001c76e33b6SJonathan Mini 	thread_getcontext(td, &uc);
10023d0586d4SJulian Elischer 	error = copyout(&uc, addr, sizeof(ucontext_t));
100393a7aa79SJulian Elischer 	if (error)
100493a7aa79SJulian Elischer 		goto bad;
100544990b8cSJulian Elischer 
10065215b187SJeff Roberson 	/* Exports clock ticks in kernel mode */
10075215b187SJeff Roberson 	addr = (caddr_t)(&td->td_mailbox->tm_sticks);
10085215b187SJeff Roberson 	temp = fuword(addr) + td->td_usticks;
10095215b187SJeff Roberson 	if (suword(addr, temp))
10105215b187SJeff Roberson 		goto bad;
10115215b187SJeff Roberson 
10125215b187SJeff Roberson 	/* Get address in latest mbox of list pointer */
10133d0586d4SJulian Elischer 	addr = (void *)(&td->td_mailbox->tm_next);
10143d0586d4SJulian Elischer 	/*
10153d0586d4SJulian Elischer 	 * Put the saved address of the previous first
10163d0586d4SJulian Elischer 	 * entry into this one
10173d0586d4SJulian Elischer 	 */
10183d0586d4SJulian Elischer 	for (;;) {
10193d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
10203d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
102193a7aa79SJulian Elischer 			error = EFAULT;
10228798d4f9SDavid Xu 			goto bad;
10233d0586d4SJulian Elischer 		}
10240cd3964fSJulian Elischer 		PROC_LOCK(p);
10253d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
10263d0586d4SJulian Elischer 			kg->kg_completed = td->td_mailbox;
10275215b187SJeff Roberson 			/*
10285215b187SJeff Roberson 			 * The thread context may be taken away by
10295215b187SJeff Roberson 			 * other upcall threads when we unlock
10305215b187SJeff Roberson 			 * process lock. it's no longer valid to
10315215b187SJeff Roberson 			 * use it again in any other places.
10325215b187SJeff Roberson 			 */
10335215b187SJeff Roberson 			td->td_mailbox = NULL;
10340cd3964fSJulian Elischer 			PROC_UNLOCK(p);
10353d0586d4SJulian Elischer 			break;
10363d0586d4SJulian Elischer 		}
10370cd3964fSJulian Elischer 		PROC_UNLOCK(p);
10383d0586d4SJulian Elischer 	}
10395215b187SJeff Roberson 	td->td_usticks = 0;
10403d0586d4SJulian Elischer 	return (0);
10418798d4f9SDavid Xu 
10428798d4f9SDavid Xu bad:
10438798d4f9SDavid Xu 	PROC_LOCK(p);
10448798d4f9SDavid Xu 	psignal(p, SIGSEGV);
10458798d4f9SDavid Xu 	PROC_UNLOCK(p);
10465215b187SJeff Roberson 	/* The mailbox is bad, don't use it */
10475215b187SJeff Roberson 	td->td_mailbox = NULL;
10485215b187SJeff Roberson 	td->td_usticks = 0;
104993a7aa79SJulian Elischer 	return (error);
10503d0586d4SJulian Elischer }
105144990b8cSJulian Elischer 
10523d0586d4SJulian Elischer /*
10533d0586d4SJulian Elischer  * Take the list of completed mailboxes for this KSEGRP and put them on this
10545215b187SJeff Roberson  * upcall's mailbox as it's the next one going up.
10553d0586d4SJulian Elischer  */
10563d0586d4SJulian Elischer static int
10575215b187SJeff Roberson thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku)
10583d0586d4SJulian Elischer {
10590cd3964fSJulian Elischer 	struct proc *p = kg->kg_proc;
10603d0586d4SJulian Elischer 	void *addr;
10613d0586d4SJulian Elischer 	uintptr_t mbx;
10623d0586d4SJulian Elischer 
10635215b187SJeff Roberson 	addr = (void *)(&ku->ku_mailbox->km_completed);
10643d0586d4SJulian Elischer 	for (;;) {
10653d0586d4SJulian Elischer 		mbx = (uintptr_t)kg->kg_completed;
10663d0586d4SJulian Elischer 		if (suword(addr, mbx)) {
10670cd3964fSJulian Elischer 			PROC_LOCK(p);
10680cd3964fSJulian Elischer 			psignal(p, SIGSEGV);
10690cd3964fSJulian Elischer 			PROC_UNLOCK(p);
10703d0586d4SJulian Elischer 			return (EFAULT);
10713d0586d4SJulian Elischer 		}
10720cd3964fSJulian Elischer 		PROC_LOCK(p);
10733d0586d4SJulian Elischer 		if (mbx == (uintptr_t)kg->kg_completed) {
10743d0586d4SJulian Elischer 			kg->kg_completed = NULL;
10750cd3964fSJulian Elischer 			PROC_UNLOCK(p);
10763d0586d4SJulian Elischer 			break;
10773d0586d4SJulian Elischer 		}
10780cd3964fSJulian Elischer 		PROC_UNLOCK(p);
10793d0586d4SJulian Elischer 	}
10803d0586d4SJulian Elischer 	return (0);
10813d0586d4SJulian Elischer }
108244990b8cSJulian Elischer 
108344990b8cSJulian Elischer /*
10848798d4f9SDavid Xu  * This function should be called at statclock interrupt time
10858798d4f9SDavid Xu  */
10868798d4f9SDavid Xu int
10875215b187SJeff Roberson thread_statclock(int user)
10888798d4f9SDavid Xu {
10898798d4f9SDavid Xu 	struct thread *td = curthread;
10908798d4f9SDavid Xu 
10915215b187SJeff Roberson 	if (td->td_ksegrp->kg_numupcalls == 0)
10925215b187SJeff Roberson 		return (-1);
10938798d4f9SDavid Xu 	if (user) {
10948798d4f9SDavid Xu 		/* Current always do via ast() */
1095b4508d7dSDavid Xu 		mtx_lock_spin(&sched_lock);
10964a338afdSJulian Elischer 		td->td_flags |= (TDF_USTATCLOCK|TDF_ASTPENDING);
1097b4508d7dSDavid Xu 		mtx_unlock_spin(&sched_lock);
10985215b187SJeff Roberson 		td->td_uuticks++;
10998798d4f9SDavid Xu 	} else {
11008798d4f9SDavid Xu 		if (td->td_mailbox != NULL)
11015215b187SJeff Roberson 			td->td_usticks++;
11025215b187SJeff Roberson 		else {
11035215b187SJeff Roberson 			/* XXXKSE
11045215b187SJeff Roberson 		 	 * We will call thread_user_enter() for every
11055215b187SJeff Roberson 			 * kernel entry in future, so if the thread mailbox
11065215b187SJeff Roberson 			 * is NULL, it must be a UTS kernel, don't account
11075215b187SJeff Roberson 			 * clock ticks for it.
11085215b187SJeff Roberson 			 */
11098798d4f9SDavid Xu 		}
11105215b187SJeff Roberson 	}
11115215b187SJeff Roberson 	return (0);
11128798d4f9SDavid Xu }
11138798d4f9SDavid Xu 
11145215b187SJeff Roberson /*
11154b4866edSDavid Xu  * Export state clock ticks for userland
11165215b187SJeff Roberson  */
11178798d4f9SDavid Xu static int
11184b4866edSDavid Xu thread_update_usr_ticks(struct thread *td, int user)
11198798d4f9SDavid Xu {
11208798d4f9SDavid Xu 	struct proc *p = td->td_proc;
11218798d4f9SDavid Xu 	struct kse_thr_mailbox *tmbx;
11225215b187SJeff Roberson 	struct kse_upcall *ku;
11236ce75196SDavid Xu 	struct ksegrp *kg;
11248798d4f9SDavid Xu 	caddr_t addr;
11255215b187SJeff Roberson 	uint uticks;
11268798d4f9SDavid Xu 
11275215b187SJeff Roberson 	if ((ku = td->td_upcall) == NULL)
11285215b187SJeff Roberson 		return (-1);
11298798d4f9SDavid Xu 
11305215b187SJeff Roberson 	tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
11318798d4f9SDavid Xu 	if ((tmbx == NULL) || (tmbx == (void *)-1))
11325215b187SJeff Roberson 		return (-1);
11334b4866edSDavid Xu 	if (user) {
11345215b187SJeff Roberson 		uticks = td->td_uuticks;
11355215b187SJeff Roberson 		td->td_uuticks = 0;
11365215b187SJeff Roberson 		addr = (caddr_t)&tmbx->tm_uticks;
11374b4866edSDavid Xu 	} else {
11384b4866edSDavid Xu 		uticks = td->td_usticks;
11395215b187SJeff Roberson 		td->td_usticks = 0;
11404b4866edSDavid Xu 		addr = (caddr_t)&tmbx->tm_sticks;
11414b4866edSDavid Xu 	}
11424b4866edSDavid Xu 	if (uticks) {
11434b4866edSDavid Xu 		if (suword(addr, uticks+fuword(addr))) {
11445215b187SJeff Roberson 			PROC_LOCK(p);
11455215b187SJeff Roberson 			psignal(p, SIGSEGV);
11465215b187SJeff Roberson 			PROC_UNLOCK(p);
11475215b187SJeff Roberson 			return (-2);
11485215b187SJeff Roberson 		}
11494b4866edSDavid Xu 	}
11506ce75196SDavid Xu 	kg = td->td_ksegrp;
11516ce75196SDavid Xu 	if (kg->kg_upquantum && ticks >= kg->kg_nextupcall) {
11524b4866edSDavid Xu 		mtx_lock_spin(&sched_lock);
11534b4866edSDavid Xu 		td->td_upcall->ku_flags |= KUF_DOUPCALL;
11544b4866edSDavid Xu 		mtx_unlock_spin(&sched_lock);
11554b4866edSDavid Xu 	}
11565215b187SJeff Roberson 	return (0);
11578798d4f9SDavid Xu }
11588798d4f9SDavid Xu 
11598798d4f9SDavid Xu /*
116044990b8cSJulian Elischer  * Discard the current thread and exit from its context.
116144990b8cSJulian Elischer  *
116244990b8cSJulian Elischer  * Because we can't free a thread while we're operating under its context,
1163696058c3SJulian Elischer  * push the current thread into our CPU's deadthread holder. This means
1164696058c3SJulian Elischer  * we needn't worry about someone else grabbing our context before we
1165696058c3SJulian Elischer  * do a cpu_throw().
116644990b8cSJulian Elischer  */
116744990b8cSJulian Elischer void
116844990b8cSJulian Elischer thread_exit(void)
116944990b8cSJulian Elischer {
117044990b8cSJulian Elischer 	struct thread *td;
117144990b8cSJulian Elischer 	struct kse *ke;
117244990b8cSJulian Elischer 	struct proc *p;
117344990b8cSJulian Elischer 	struct ksegrp	*kg;
117444990b8cSJulian Elischer 
117544990b8cSJulian Elischer 	td = curthread;
117644990b8cSJulian Elischer 	kg = td->td_ksegrp;
117744990b8cSJulian Elischer 	p = td->td_proc;
117844990b8cSJulian Elischer 	ke = td->td_kse;
117944990b8cSJulian Elischer 
118044990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
118188151aa3SJulian Elischer 	KASSERT(p != NULL, ("thread exiting without a process"));
118288151aa3SJulian Elischer 	KASSERT(ke != NULL, ("thread exiting without a kse"));
118388151aa3SJulian Elischer 	KASSERT(kg != NULL, ("thread exiting without a kse group"));
118444990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
118544990b8cSJulian Elischer 	CTR1(KTR_PROC, "thread_exit: thread %p", td);
118644990b8cSJulian Elischer 	KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
118744990b8cSJulian Elischer 
118848bfcdddSJulian Elischer 	if (td->td_standin != NULL) {
118948bfcdddSJulian Elischer 		thread_stash(td->td_standin);
119048bfcdddSJulian Elischer 		td->td_standin = NULL;
119148bfcdddSJulian Elischer 	}
119248bfcdddSJulian Elischer 
119344990b8cSJulian Elischer 	cpu_thread_exit(td);	/* XXXSMP */
119444990b8cSJulian Elischer 
11951faf202eSJulian Elischer 	/*
11961faf202eSJulian Elischer 	 * The last thread is left attached to the process
11971faf202eSJulian Elischer 	 * So that the whole bundle gets recycled. Skip
11981faf202eSJulian Elischer 	 * all this stuff.
11991faf202eSJulian Elischer 	 */
12001faf202eSJulian Elischer 	if (p->p_numthreads > 1) {
1201d3a0bd78SJulian Elischer 		thread_unlink(td);
12020252d203SDavid Xu 		if (p->p_maxthrwaits)
12030252d203SDavid Xu 			wakeup(&p->p_numthreads);
120444990b8cSJulian Elischer 		/*
120544990b8cSJulian Elischer 		 * The test below is NOT true if we are the
12061faf202eSJulian Elischer 		 * sole exiting thread. P_STOPPED_SNGL is unset
120744990b8cSJulian Elischer 		 * in exit1() after it is the only survivor.
120844990b8cSJulian Elischer 		 */
12091279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
121044990b8cSJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
121171fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
121244990b8cSJulian Elischer 			}
121344990b8cSJulian Elischer 		}
121448bfcdddSJulian Elischer 
12155215b187SJeff Roberson 		/*
12165215b187SJeff Roberson 		 * Because each upcall structure has an owner thread,
12175215b187SJeff Roberson 		 * owner thread exits only when process is in exiting
12185215b187SJeff Roberson 		 * state, so upcall to userland is no longer needed,
12195215b187SJeff Roberson 		 * deleting upcall structure is safe here.
12205215b187SJeff Roberson 		 * So when all threads in a group is exited, all upcalls
12215215b187SJeff Roberson 		 * in the group should be automatically freed.
12225215b187SJeff Roberson 		 */
12235215b187SJeff Roberson 		if (td->td_upcall)
12245215b187SJeff Roberson 			upcall_remove(td);
12256f8132a8SJulian Elischer 
12265215b187SJeff Roberson 		ke->ke_state = KES_UNQUEUED;
12275215b187SJeff Roberson 		ke->ke_thread = NULL;
122848bfcdddSJulian Elischer 		/*
122993a7aa79SJulian Elischer 		 * Decide what to do with the KSE attached to this thread.
123048bfcdddSJulian Elischer 		 */
12315215b187SJeff Roberson 		if (ke->ke_flags & KEF_EXIT)
12326f8132a8SJulian Elischer 			kse_unlink(ke);
12335215b187SJeff Roberson 		else
12346f8132a8SJulian Elischer 			kse_reassign(ke);
12356f8132a8SJulian Elischer 		PROC_UNLOCK(p);
12365215b187SJeff Roberson 		td->td_kse	= NULL;
12375c8329edSJulian Elischer 		td->td_state	= TDS_INACTIVE;
123836f7b36fSDavid Xu #if 0
12395c8329edSJulian Elischer 		td->td_proc	= NULL;
124036f7b36fSDavid Xu #endif
12415c8329edSJulian Elischer 		td->td_ksegrp	= NULL;
12425c8329edSJulian Elischer 		td->td_last_kse	= NULL;
1243696058c3SJulian Elischer 		PCPU_SET(deadthread, td);
12441faf202eSJulian Elischer 	} else {
12451faf202eSJulian Elischer 		PROC_UNLOCK(p);
12461faf202eSJulian Elischer 	}
12474093529dSJeff Roberson 	/* XXX Shouldn't cpu_throw() here. */
1248cc66ebe2SPeter Wemm 	mtx_assert(&sched_lock, MA_OWNED);
1249cc66ebe2SPeter Wemm #if defined(__i386__) || defined(__sparc64__)
1250cc66ebe2SPeter Wemm 	cpu_throw(td, choosethread());
1251cc66ebe2SPeter Wemm #else
125244990b8cSJulian Elischer 	cpu_throw();
1253cc66ebe2SPeter Wemm #endif
1254cc66ebe2SPeter Wemm 	panic("I'm a teapot!");
125544990b8cSJulian Elischer 	/* NOTREACHED */
125644990b8cSJulian Elischer }
125744990b8cSJulian Elischer 
125844990b8cSJulian Elischer /*
1259696058c3SJulian Elischer  * Do any thread specific cleanups that may be needed in wait()
1260696058c3SJulian Elischer  * called with Giant held, proc and schedlock not held.
1261696058c3SJulian Elischer  */
1262696058c3SJulian Elischer void
1263696058c3SJulian Elischer thread_wait(struct proc *p)
1264696058c3SJulian Elischer {
1265696058c3SJulian Elischer 	struct thread *td;
1266696058c3SJulian Elischer 
1267696058c3SJulian Elischer 	KASSERT((p->p_numthreads == 1), ("Muliple threads in wait1()"));
1268696058c3SJulian Elischer 	KASSERT((p->p_numksegrps == 1), ("Muliple ksegrps in wait1()"));
1269696058c3SJulian Elischer 	FOREACH_THREAD_IN_PROC(p, td) {
1270696058c3SJulian Elischer 		if (td->td_standin != NULL) {
1271696058c3SJulian Elischer 			thread_free(td->td_standin);
1272696058c3SJulian Elischer 			td->td_standin = NULL;
1273696058c3SJulian Elischer 		}
1274696058c3SJulian Elischer 		cpu_thread_clean(td);
1275696058c3SJulian Elischer 	}
1276696058c3SJulian Elischer 	thread_reap();	/* check for zombie threads etc. */
1277696058c3SJulian Elischer }
1278696058c3SJulian Elischer 
1279696058c3SJulian Elischer /*
128044990b8cSJulian Elischer  * Link a thread to a process.
12811faf202eSJulian Elischer  * set up anything that needs to be initialized for it to
12821faf202eSJulian Elischer  * be used by the process.
128344990b8cSJulian Elischer  *
128444990b8cSJulian Elischer  * Note that we do not link to the proc's ucred here.
128544990b8cSJulian Elischer  * The thread is linked as if running but no KSE assigned.
128644990b8cSJulian Elischer  */
128744990b8cSJulian Elischer void
128844990b8cSJulian Elischer thread_link(struct thread *td, struct ksegrp *kg)
128944990b8cSJulian Elischer {
129044990b8cSJulian Elischer 	struct proc *p;
129144990b8cSJulian Elischer 
129244990b8cSJulian Elischer 	p = kg->kg_proc;
129371fad9fdSJulian Elischer 	td->td_state    = TDS_INACTIVE;
129444990b8cSJulian Elischer 	td->td_proc     = p;
129544990b8cSJulian Elischer 	td->td_ksegrp   = kg;
129644990b8cSJulian Elischer 	td->td_last_kse = NULL;
12975215b187SJeff Roberson 	td->td_flags    = 0;
12985215b187SJeff Roberson 	td->td_kse      = NULL;
129944990b8cSJulian Elischer 
13001faf202eSJulian Elischer 	LIST_INIT(&td->td_contested);
13011faf202eSJulian Elischer 	callout_init(&td->td_slpcallout, 1);
130244990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
130344990b8cSJulian Elischer 	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
130444990b8cSJulian Elischer 	p->p_numthreads++;
130544990b8cSJulian Elischer 	kg->kg_numthreads++;
130644990b8cSJulian Elischer }
130744990b8cSJulian Elischer 
1308d3a0bd78SJulian Elischer void
1309d3a0bd78SJulian Elischer thread_unlink(struct thread *td)
1310d3a0bd78SJulian Elischer {
1311d3a0bd78SJulian Elischer 	struct proc *p = td->td_proc;
1312d3a0bd78SJulian Elischer 	struct ksegrp *kg = td->td_ksegrp;
1313d3a0bd78SJulian Elischer 
1314d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
1315d3a0bd78SJulian Elischer 	p->p_numthreads--;
1316d3a0bd78SJulian Elischer 	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
1317d3a0bd78SJulian Elischer 	kg->kg_numthreads--;
1318d3a0bd78SJulian Elischer 	/* could clear a few other things here */
1319d3a0bd78SJulian Elischer }
1320d3a0bd78SJulian Elischer 
13215215b187SJeff Roberson /*
13225215b187SJeff Roberson  * Purge a ksegrp resource. When a ksegrp is preparing to
13235215b187SJeff Roberson  * exit, it calls this function.
13245215b187SJeff Roberson  */
13255215b187SJeff Roberson void
13265215b187SJeff Roberson kse_purge_group(struct thread *td)
13275215b187SJeff Roberson {
13285215b187SJeff Roberson 	struct ksegrp *kg;
13295215b187SJeff Roberson 	struct kse *ke;
13305215b187SJeff Roberson 
13315215b187SJeff Roberson 	kg = td->td_ksegrp;
13325215b187SJeff Roberson  	KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__));
13335215b187SJeff Roberson 	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
13345215b187SJeff Roberson 		KASSERT(ke->ke_state == KES_IDLE,
13355215b187SJeff Roberson 			("%s: wrong idle KSE state", __func__));
13365215b187SJeff Roberson 		kse_unlink(ke);
13375215b187SJeff Roberson 	}
13385215b187SJeff Roberson 	KASSERT((kg->kg_kses == 1),
13395215b187SJeff Roberson 		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
13405215b187SJeff Roberson 	KASSERT((kg->kg_numupcalls == 0),
13415215b187SJeff Roberson 	        ("%s: ksegrp still has %d upcall datas",
13425215b187SJeff Roberson 		__func__, kg->kg_numupcalls));
13435215b187SJeff Roberson }
13445215b187SJeff Roberson 
13455215b187SJeff Roberson /*
13465215b187SJeff Roberson  * Purge a process's KSE resource. When a process is preparing to
13475215b187SJeff Roberson  * exit, it calls kse_purge to release any extra KSE resources in
13485215b187SJeff Roberson  * the process.
13495215b187SJeff Roberson  */
13505c8329edSJulian Elischer void
13515c8329edSJulian Elischer kse_purge(struct proc *p, struct thread *td)
13525c8329edSJulian Elischer {
13535c8329edSJulian Elischer 	struct ksegrp *kg;
13545215b187SJeff Roberson 	struct kse *ke;
13555c8329edSJulian Elischer 
13565c8329edSJulian Elischer  	KASSERT(p->p_numthreads == 1, ("bad thread number"));
13575c8329edSJulian Elischer 	mtx_lock_spin(&sched_lock);
13585c8329edSJulian Elischer 	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
13595c8329edSJulian Elischer 		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
13605c8329edSJulian Elischer 		p->p_numksegrps--;
13615215b187SJeff Roberson 		/*
13625215b187SJeff Roberson 		 * There is no ownership for KSE, after all threads
13635215b187SJeff Roberson 		 * in the group exited, it is possible that some KSEs
13645215b187SJeff Roberson 		 * were left in idle queue, gc them now.
13655215b187SJeff Roberson 		 */
13665215b187SJeff Roberson 		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
13675215b187SJeff Roberson 			KASSERT(ke->ke_state == KES_IDLE,
13685215b187SJeff Roberson 			   ("%s: wrong idle KSE state", __func__));
13695215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
13705215b187SJeff Roberson 			kg->kg_idle_kses--;
13715215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
13725215b187SJeff Roberson 			kg->kg_kses--;
13735215b187SJeff Roberson 			kse_stash(ke);
13745215b187SJeff Roberson 		}
13755c8329edSJulian Elischer 		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
13765c8329edSJulian Elischer 		        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
13775215b187SJeff Roberson 		        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
13785215b187SJeff Roberson 		KASSERT((kg->kg_numupcalls == 0),
13795215b187SJeff Roberson 		        ("%s: ksegrp still has %d upcall datas",
13805215b187SJeff Roberson 			__func__, kg->kg_numupcalls));
13815215b187SJeff Roberson 
13825215b187SJeff Roberson 		if (kg != td->td_ksegrp)
13835c8329edSJulian Elischer 			ksegrp_stash(kg);
13845c8329edSJulian Elischer 	}
13855c8329edSJulian Elischer 	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
13865c8329edSJulian Elischer 	p->p_numksegrps++;
13875c8329edSJulian Elischer 	mtx_unlock_spin(&sched_lock);
13885c8329edSJulian Elischer }
13895c8329edSJulian Elischer 
13905215b187SJeff Roberson /*
13915215b187SJeff Roberson  * This function is intended to be used to initialize a spare thread
13925215b187SJeff Roberson  * for upcall. Initialize thread's large data area outside sched_lock
13935215b187SJeff Roberson  * for thread_schedule_upcall().
13945215b187SJeff Roberson  */
13955215b187SJeff Roberson void
13965215b187SJeff Roberson thread_alloc_spare(struct thread *td, struct thread *spare)
13975215b187SJeff Roberson {
13985215b187SJeff Roberson 	if (td->td_standin)
13995215b187SJeff Roberson 		return;
14005215b187SJeff Roberson 	if (spare == NULL)
14015215b187SJeff Roberson 		spare = thread_alloc();
14025215b187SJeff Roberson 	td->td_standin = spare;
14035215b187SJeff Roberson 	bzero(&spare->td_startzero,
14045215b187SJeff Roberson 	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
14055215b187SJeff Roberson 	spare->td_proc = td->td_proc;
14065215b187SJeff Roberson 	spare->td_ucred = crhold(td->td_ucred);
14075215b187SJeff Roberson }
14085c8329edSJulian Elischer 
140944990b8cSJulian Elischer /*
1410c76e33b6SJonathan Mini  * Create a thread and schedule it for upcall on the KSE given.
141193a7aa79SJulian Elischer  * Use our thread's standin so that we don't have to allocate one.
141244990b8cSJulian Elischer  */
141344990b8cSJulian Elischer struct thread *
14145215b187SJeff Roberson thread_schedule_upcall(struct thread *td, struct kse_upcall *ku)
141544990b8cSJulian Elischer {
141644990b8cSJulian Elischer 	struct thread *td2;
141744990b8cSJulian Elischer 
141844990b8cSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
141948bfcdddSJulian Elischer 
142048bfcdddSJulian Elischer 	/*
14215215b187SJeff Roberson 	 * Schedule an upcall thread on specified kse_upcall,
14225215b187SJeff Roberson 	 * the kse_upcall must be free.
14235215b187SJeff Roberson 	 * td must have a spare thread.
142448bfcdddSJulian Elischer 	 */
14255215b187SJeff Roberson 	KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__));
142648bfcdddSJulian Elischer 	if ((td2 = td->td_standin) != NULL) {
142748bfcdddSJulian Elischer 		td->td_standin = NULL;
142844990b8cSJulian Elischer 	} else {
14295215b187SJeff Roberson 		panic("no reserve thread when scheduling an upcall");
143048bfcdddSJulian Elischer 		return (NULL);
143144990b8cSJulian Elischer 	}
143244990b8cSJulian Elischer 	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
143348bfcdddSJulian Elischer 	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
14341faf202eSJulian Elischer 	bcopy(&td->td_startcopy, &td2->td_startcopy,
14351faf202eSJulian Elischer 	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
14365215b187SJeff Roberson 	thread_link(td2, ku->ku_ksegrp);
143736f7b36fSDavid Xu 	/* inherit blocked thread's context */
143836f7b36fSDavid Xu 	bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe));
143936f7b36fSDavid Xu 	cpu_set_upcall(td2, td->td_pcb);
14405215b187SJeff Roberson 	/* Let the new thread become owner of the upcall */
14415215b187SJeff Roberson 	ku->ku_owner   = td2;
14425215b187SJeff Roberson 	td2->td_upcall = ku;
14435215b187SJeff Roberson 	td2->td_flags  = TDF_UPCALLING;
14444093529dSJeff Roberson #if 0	/* XXX This shouldn't be necessary */
144502bbffafSDavid Xu 	if (td->td_proc->p_sflag & PS_NEEDSIGCHK)
144602bbffafSDavid Xu 		td2->td_flags |= TDF_ASTPENDING;
14474093529dSJeff Roberson #endif
14485215b187SJeff Roberson 	td2->td_kse    = NULL;
144948bfcdddSJulian Elischer 	td2->td_state  = TDS_CAN_RUN;
145048bfcdddSJulian Elischer 	td2->td_inhibitors = 0;
145144990b8cSJulian Elischer 	setrunqueue(td2);
145248bfcdddSJulian Elischer 	return (td2);	/* bogus.. should be a void function */
145344990b8cSJulian Elischer }
145444990b8cSJulian Elischer 
145558a3c273SJeff Roberson void
145658a3c273SJeff Roberson thread_signal_add(struct thread *td, int sig)
1457c76e33b6SJonathan Mini {
145858a3c273SJeff Roberson 	struct kse_upcall *ku;
145958a3c273SJeff Roberson 	struct proc *p;
1460c76e33b6SJonathan Mini 	sigset_t ss;
1461c76e33b6SJonathan Mini 	int error;
1462c76e33b6SJonathan Mini 
146358a3c273SJeff Roberson 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
146458a3c273SJeff Roberson 	td = curthread;
146558a3c273SJeff Roberson 	ku = td->td_upcall;
146658a3c273SJeff Roberson 	p = td->td_proc;
146758a3c273SJeff Roberson 
1468c76e33b6SJonathan Mini 	PROC_UNLOCK(p);
146958a3c273SJeff Roberson 	error = copyin(&ku->ku_mailbox->km_sigscaught, &ss, sizeof(sigset_t));
1470c76e33b6SJonathan Mini 	if (error)
147158a3c273SJeff Roberson 		goto error;
147258a3c273SJeff Roberson 
1473c76e33b6SJonathan Mini 	SIGADDSET(ss, sig);
147458a3c273SJeff Roberson 
147558a3c273SJeff Roberson 	error = copyout(&ss, &ku->ku_mailbox->km_sigscaught, sizeof(sigset_t));
1476c76e33b6SJonathan Mini 	if (error)
147758a3c273SJeff Roberson 		goto error;
147858a3c273SJeff Roberson 
147958a3c273SJeff Roberson 	PROC_LOCK(p);
148058a3c273SJeff Roberson 	return;
148158a3c273SJeff Roberson error:
148258a3c273SJeff Roberson 	PROC_LOCK(p);
148358a3c273SJeff Roberson 	sigexit(td, SIGILL);
148458a3c273SJeff Roberson }
148558a3c273SJeff Roberson 
148658a3c273SJeff Roberson 
148758a3c273SJeff Roberson /*
148858a3c273SJeff Roberson  * Schedule an upcall to notify a KSE process recieved signals.
148958a3c273SJeff Roberson  *
149058a3c273SJeff Roberson  */
149158a3c273SJeff Roberson void
149258a3c273SJeff Roberson thread_signal_upcall(struct thread *td)
149358a3c273SJeff Roberson {
1494c76e33b6SJonathan Mini 	mtx_lock_spin(&sched_lock);
149558a3c273SJeff Roberson 	td->td_flags |= TDF_UPCALLING;
1496c76e33b6SJonathan Mini 	mtx_unlock_spin(&sched_lock);
149758a3c273SJeff Roberson 
149858a3c273SJeff Roberson 	return;
1499c76e33b6SJonathan Mini }
1500c76e33b6SJonathan Mini 
15016ce75196SDavid Xu void
15026ce75196SDavid Xu thread_switchout(struct thread *td)
15036ce75196SDavid Xu {
15046ce75196SDavid Xu 	struct kse_upcall *ku;
15056ce75196SDavid Xu 
15066ce75196SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
15076ce75196SDavid Xu 
15086ce75196SDavid Xu 	/*
15096ce75196SDavid Xu 	 * If the outgoing thread is in threaded group and has never
15106ce75196SDavid Xu 	 * scheduled an upcall, decide whether this is a short
15116ce75196SDavid Xu 	 * or long term event and thus whether or not to schedule
15126ce75196SDavid Xu 	 * an upcall.
15136ce75196SDavid Xu 	 * If it is a short term event, just suspend it in
15146ce75196SDavid Xu 	 * a way that takes its KSE with it.
15156ce75196SDavid Xu 	 * Select the events for which we want to schedule upcalls.
15166ce75196SDavid Xu 	 * For now it's just sleep.
15176ce75196SDavid Xu 	 * XXXKSE eventually almost any inhibition could do.
15186ce75196SDavid Xu 	 */
15196ce75196SDavid Xu 	if (TD_CAN_UNBIND(td) && (td->td_standin) && TD_ON_SLEEPQ(td)) {
15206ce75196SDavid Xu 		/*
15216ce75196SDavid Xu 		 * Release ownership of upcall, and schedule an upcall
15226ce75196SDavid Xu 		 * thread, this new upcall thread becomes the owner of
15236ce75196SDavid Xu 		 * the upcall structure.
15246ce75196SDavid Xu 		 */
15256ce75196SDavid Xu 		ku = td->td_upcall;
15266ce75196SDavid Xu 		ku->ku_owner = NULL;
15276ce75196SDavid Xu 		td->td_upcall = NULL;
15286ce75196SDavid Xu 		td->td_flags &= ~TDF_CAN_UNBIND;
15296ce75196SDavid Xu 		thread_schedule_upcall(td, ku);
15306ce75196SDavid Xu 	}
15316ce75196SDavid Xu }
15326ce75196SDavid Xu 
1533c76e33b6SJonathan Mini /*
15345215b187SJeff Roberson  * Setup done on the thread when it enters the kernel.
15351434d3feSJulian Elischer  * XXXKSE Presently only for syscalls but eventually all kernel entries.
15361434d3feSJulian Elischer  */
15371434d3feSJulian Elischer void
15381434d3feSJulian Elischer thread_user_enter(struct proc *p, struct thread *td)
15391434d3feSJulian Elischer {
15405215b187SJeff Roberson 	struct ksegrp *kg;
15415215b187SJeff Roberson 	struct kse_upcall *ku;
15421434d3feSJulian Elischer 
15435215b187SJeff Roberson 	kg = td->td_ksegrp;
15441434d3feSJulian Elischer 	/*
15451434d3feSJulian Elischer 	 * First check that we shouldn't just abort.
15461434d3feSJulian Elischer 	 * But check if we are the single thread first!
15471434d3feSJulian Elischer 	 * XXX p_singlethread not locked, but should be safe.
15481434d3feSJulian Elischer 	 */
15495215b187SJeff Roberson 	if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
15501434d3feSJulian Elischer 		PROC_LOCK(p);
15511434d3feSJulian Elischer 		mtx_lock_spin(&sched_lock);
1552e574e444SDavid Xu 		thread_stopped(p);
15531434d3feSJulian Elischer 		thread_exit();
15541434d3feSJulian Elischer 		/* NOTREACHED */
15551434d3feSJulian Elischer 	}
15561434d3feSJulian Elischer 
15571434d3feSJulian Elischer 	/*
15581434d3feSJulian Elischer 	 * If we are doing a syscall in a KSE environment,
15591434d3feSJulian Elischer 	 * note where our mailbox is. There is always the
156093a7aa79SJulian Elischer 	 * possibility that we could do this lazily (in kse_reassign()),
15611434d3feSJulian Elischer 	 * but for now do it every time.
15621434d3feSJulian Elischer 	 */
15635215b187SJeff Roberson 	kg = td->td_ksegrp;
15645215b187SJeff Roberson 	if (kg->kg_numupcalls) {
15655215b187SJeff Roberson 		ku = td->td_upcall;
15665215b187SJeff Roberson 		KASSERT(ku, ("%s: no upcall owned", __func__));
15675215b187SJeff Roberson 		KASSERT((ku->ku_owner == td), ("%s: wrong owner", __func__));
15681434d3feSJulian Elischer 		td->td_mailbox =
15695215b187SJeff Roberson 		    (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
15701434d3feSJulian Elischer 		if ((td->td_mailbox == NULL) ||
15718798d4f9SDavid Xu 		    (td->td_mailbox == (void *)-1)) {
15725215b187SJeff Roberson 		    	/* Don't schedule upcall when blocked */
15735215b187SJeff Roberson 			td->td_mailbox = NULL;
15748798d4f9SDavid Xu 			mtx_lock_spin(&sched_lock);
15755215b187SJeff Roberson 			td->td_flags &= ~TDF_CAN_UNBIND;
15768798d4f9SDavid Xu 			mtx_unlock_spin(&sched_lock);
15778798d4f9SDavid Xu 		} else {
15781434d3feSJulian Elischer 			if (td->td_standin == NULL)
15795215b187SJeff Roberson 				thread_alloc_spare(td, NULL);
15808798d4f9SDavid Xu 			mtx_lock_spin(&sched_lock);
158193a7aa79SJulian Elischer 			td->td_flags |= TDF_CAN_UNBIND;
15828798d4f9SDavid Xu 			mtx_unlock_spin(&sched_lock);
15835215b187SJeff Roberson 		}
15841434d3feSJulian Elischer 	}
15851434d3feSJulian Elischer }
15861434d3feSJulian Elischer 
15871434d3feSJulian Elischer /*
1588c76e33b6SJonathan Mini  * The extra work we go through if we are a threaded process when we
1589c76e33b6SJonathan Mini  * return to userland.
1590c76e33b6SJonathan Mini  *
1591c76e33b6SJonathan Mini  * If we are a KSE process and returning to user mode, check for
1592c76e33b6SJonathan Mini  * extra work to do before we return (e.g. for more syscalls
1593c76e33b6SJonathan Mini  * to complete first).  If we were in a critical section, we should
1594c76e33b6SJonathan Mini  * just return to let it finish. Same if we were in the UTS (in
1595c76e33b6SJonathan Mini  * which case the mailbox's context's busy indicator will be set).
1596c76e33b6SJonathan Mini  * The only traps we suport will have set the mailbox.
1597c76e33b6SJonathan Mini  * We will clear it here.
159844990b8cSJulian Elischer  */
1599c76e33b6SJonathan Mini int
1600253fdd5bSJulian Elischer thread_userret(struct thread *td, struct trapframe *frame)
1601c76e33b6SJonathan Mini {
16020252d203SDavid Xu 	int error = 0, upcalls;
16035215b187SJeff Roberson 	struct kse_upcall *ku;
16040252d203SDavid Xu 	struct ksegrp *kg, *kg2;
160548bfcdddSJulian Elischer 	struct proc *p;
1606bfd83250SDavid Xu 	struct timespec ts;
1607c76e33b6SJonathan Mini 
16086f8132a8SJulian Elischer 	p = td->td_proc;
16095215b187SJeff Roberson 	kg = td->td_ksegrp;
161093a7aa79SJulian Elischer 
16114093529dSJeff Roberson 
16125215b187SJeff Roberson 	/* Nothing to do with non-threaded group/process */
16135215b187SJeff Roberson 	if (td->td_ksegrp->kg_numupcalls == 0)
16145215b187SJeff Roberson 		return (0);
16155215b187SJeff Roberson 
16165215b187SJeff Roberson 	/*
16175215b187SJeff Roberson 	 * Stat clock interrupt hit in userland, it
16185215b187SJeff Roberson 	 * is returning from interrupt, charge thread's
16195215b187SJeff Roberson 	 * userland time for UTS.
16205215b187SJeff Roberson 	 */
16215215b187SJeff Roberson 	if (td->td_flags & TDF_USTATCLOCK) {
16224b4866edSDavid Xu 		thread_update_usr_ticks(td, 1);
162393a7aa79SJulian Elischer 		mtx_lock_spin(&sched_lock);
16245215b187SJeff Roberson 		td->td_flags &= ~TDF_USTATCLOCK;
16250dbb100bSDavid Xu 		mtx_unlock_spin(&sched_lock);
16264b4866edSDavid Xu 		if (kg->kg_completed ||
16274b4866edSDavid Xu 		    (td->td_upcall->ku_flags & KUF_DOUPCALL))
16284b4866edSDavid Xu 			thread_user_enter(p, td);
16295215b187SJeff Roberson 	}
16305215b187SJeff Roberson 
16315215b187SJeff Roberson 	/*
16325215b187SJeff Roberson 	 * Optimisation:
16335215b187SJeff Roberson 	 * This thread has not started any upcall.
16345215b187SJeff Roberson 	 * If there is no work to report other than ourself,
16355215b187SJeff Roberson 	 * then it can return direct to userland.
16365215b187SJeff Roberson 	 */
16375215b187SJeff Roberson 	if (TD_CAN_UNBIND(td)) {
16385215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
16395215b187SJeff Roberson 		td->td_flags &= ~TDF_CAN_UNBIND;
16409a4b78c9SDavid Xu 		ku = td->td_upcall;
16414093529dSJeff Roberson 		if ((td->td_flags & TDF_NEEDSIGCHK) == 0 &&
164221e0492aSDavid Xu 		    (kg->kg_completed == NULL) &&
16436ce75196SDavid Xu 		    (ku->ku_flags & KUF_DOUPCALL) == 0 &&
16446ce75196SDavid Xu 		    (kg->kg_upquantum && ticks >= kg->kg_nextupcall)) {
16454093529dSJeff Roberson 			mtx_unlock_spin(&sched_lock);
16464b4866edSDavid Xu 			thread_update_usr_ticks(td, 0);
16479a4b78c9SDavid Xu 			nanotime(&ts);
16489a4b78c9SDavid Xu 			error = copyout(&ts,
16499a4b78c9SDavid Xu 				(caddr_t)&ku->ku_mailbox->km_timeofday,
16509a4b78c9SDavid Xu 				sizeof(ts));
165121e0492aSDavid Xu 			td->td_mailbox = 0;
16529a4b78c9SDavid Xu 			if (error)
16539a4b78c9SDavid Xu 				goto out;
165493a7aa79SJulian Elischer 			return (0);
165593a7aa79SJulian Elischer 		}
16564093529dSJeff Roberson 		mtx_unlock_spin(&sched_lock);
165793a7aa79SJulian Elischer 		error = thread_export_context(td);
165848bfcdddSJulian Elischer 		if (error) {
165948bfcdddSJulian Elischer 			/*
16605215b187SJeff Roberson 			 * Failing to do the KSE operation just defaults
166148bfcdddSJulian Elischer 			 * back to synchonous operation, so just return from
166293a7aa79SJulian Elischer 			 * the syscall.
166393a7aa79SJulian Elischer 			 */
16645215b187SJeff Roberson 			return (0);
166593a7aa79SJulian Elischer 		}
166693a7aa79SJulian Elischer 		/*
16675215b187SJeff Roberson 		 * There is something to report, and we own an upcall
16685215b187SJeff Roberson 		 * strucuture, we can go to userland.
16695215b187SJeff Roberson 		 * Turn ourself into an upcall thread.
167093a7aa79SJulian Elischer 		 */
16715215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
167293a7aa79SJulian Elischer 		td->td_flags |= TDF_UPCALLING;
167393a7aa79SJulian Elischer 		mtx_unlock_spin(&sched_lock);
16745215b187SJeff Roberson 	} else if (td->td_mailbox) {
167593a7aa79SJulian Elischer 		error = thread_export_context(td);
167693a7aa79SJulian Elischer 		/* possibly upcall with error? */
1677e574e444SDavid Xu 		PROC_LOCK(p);
16786f8132a8SJulian Elischer 		/*
16795215b187SJeff Roberson 		 * There are upcall threads waiting for
16805215b187SJeff Roberson 		 * work to do, wake one of them up.
16815215b187SJeff Roberson 		 * XXXKSE Maybe wake all of them up.
16826f8132a8SJulian Elischer 		 */
1683e574e444SDavid Xu 		if (!error && kg->kg_upsleeps)
16845215b187SJeff Roberson 			wakeup_one(&kg->kg_completed);
1685e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
1686e574e444SDavid Xu 		thread_stopped(p);
168793a7aa79SJulian Elischer 		thread_exit();
16885215b187SJeff Roberson 		/* NOTREACHED */
168948bfcdddSJulian Elischer 	}
169093a7aa79SJulian Elischer 
1691a87891eeSDavid Xu 	KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind"));
1692a87891eeSDavid Xu 
1693a87891eeSDavid Xu 	if (p->p_numthreads > max_threads_per_proc) {
1694a87891eeSDavid Xu 		max_threads_hits++;
1695a87891eeSDavid Xu 		PROC_LOCK(p);
1696a87891eeSDavid Xu 		while (p->p_numthreads > max_threads_per_proc) {
1697a87891eeSDavid Xu 			if (P_SHOULDSTOP(p))
1698a87891eeSDavid Xu 				break;
1699a87891eeSDavid Xu 			upcalls = 0;
1700a87891eeSDavid Xu 			mtx_lock_spin(&sched_lock);
1701a87891eeSDavid Xu 			FOREACH_KSEGRP_IN_PROC(p, kg2) {
1702a87891eeSDavid Xu 				if (kg2->kg_numupcalls == 0)
1703a87891eeSDavid Xu 					upcalls++;
1704a87891eeSDavid Xu 				else
1705a87891eeSDavid Xu 					upcalls += kg2->kg_numupcalls;
1706a87891eeSDavid Xu 			}
1707a87891eeSDavid Xu 			mtx_unlock_spin(&sched_lock);
1708a87891eeSDavid Xu 			if (upcalls >= max_threads_per_proc)
1709a87891eeSDavid Xu 				break;
1710a87891eeSDavid Xu 			p->p_maxthrwaits++;
1711a87891eeSDavid Xu 			msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH,
1712a87891eeSDavid Xu 			    "maxthreads", NULL);
1713a87891eeSDavid Xu 			p->p_maxthrwaits--;
1714a87891eeSDavid Xu 		}
1715a87891eeSDavid Xu 		PROC_UNLOCK(p);
1716a87891eeSDavid Xu 	}
1717a87891eeSDavid Xu 
171893a7aa79SJulian Elischer 	if (td->td_flags & TDF_UPCALLING) {
17196ce75196SDavid Xu 		kg->kg_nextupcall = ticks+kg->kg_upquantum;
17205215b187SJeff Roberson 		ku = td->td_upcall;
172148bfcdddSJulian Elischer 		/*
172244990b8cSJulian Elischer 		 * There is no more work to do and we are going to ride
17235215b187SJeff Roberson 		 * this thread up to userland as an upcall.
172448bfcdddSJulian Elischer 		 * Do the last parts of the setup needed for the upcall.
172544990b8cSJulian Elischer 		 */
1726c76e33b6SJonathan Mini 		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1727ed32df81SJulian Elischer 		    td, td->td_proc->p_pid, td->td_proc->p_comm);
1728c76e33b6SJonathan Mini 
1729c76e33b6SJonathan Mini 		/*
1730c76e33b6SJonathan Mini 		 * Set user context to the UTS.
1731696058c3SJulian Elischer 		 * Will use Giant in cpu_thread_clean() because it uses
1732696058c3SJulian Elischer 		 * kmem_free(kernel_map, ...)
1733c76e33b6SJonathan Mini 		 */
17345215b187SJeff Roberson 		cpu_set_upcall_kse(td, ku);
17355215b187SJeff Roberson 		mtx_lock_spin(&sched_lock);
17365215b187SJeff Roberson 		td->td_flags &= ~TDF_UPCALLING;
17375215b187SJeff Roberson 		if (ku->ku_flags & KUF_DOUPCALL)
17385215b187SJeff Roberson 			ku->ku_flags &= ~KUF_DOUPCALL;
17395215b187SJeff Roberson 		mtx_unlock_spin(&sched_lock);
17403d0586d4SJulian Elischer 
1741c76e33b6SJonathan Mini 		/*
174293a7aa79SJulian Elischer 		 * Unhook the list of completed threads.
174393a7aa79SJulian Elischer 		 * anything that completes after this gets to
174493a7aa79SJulian Elischer 		 * come in next time.
174593a7aa79SJulian Elischer 		 * Put the list of completed thread mailboxes on
174693a7aa79SJulian Elischer 		 * this KSE's mailbox.
1747c76e33b6SJonathan Mini 		 */
17485215b187SJeff Roberson 		error = thread_link_mboxes(kg, ku);
17493d0586d4SJulian Elischer 		if (error)
17500252d203SDavid Xu 			goto out;
1751c76e33b6SJonathan Mini 
1752c76e33b6SJonathan Mini 		/*
175393a7aa79SJulian Elischer 		 * Set state and clear the  thread mailbox pointer.
175448bfcdddSJulian Elischer 		 * From now on we are just a bound outgoing process.
175548bfcdddSJulian Elischer 		 * **Problem** userret is often called several times.
175693a7aa79SJulian Elischer 		 * it would be nice if this all happenned only on the first
175793a7aa79SJulian Elischer 		 * time through. (the scan for extra work etc.)
1758c76e33b6SJonathan Mini 		 */
17595215b187SJeff Roberson 		error = suword((caddr_t)&ku->ku_mailbox->km_curthread, 0);
176093a7aa79SJulian Elischer 		if (error)
17610252d203SDavid Xu 			goto out;
17625215b187SJeff Roberson 
17635215b187SJeff Roberson 		/* Export current system time */
1764bfd83250SDavid Xu 		nanotime(&ts);
17650252d203SDavid Xu 		error = copyout(&ts, (caddr_t)&ku->ku_mailbox->km_timeofday,
17660252d203SDavid Xu 			sizeof(ts));
1767bfd83250SDavid Xu 	}
17680252d203SDavid Xu 
17690252d203SDavid Xu out:
17700252d203SDavid Xu 	if (error) {
17713d0586d4SJulian Elischer 		/*
1772fc8cdd87SDavid Xu 		 * Things are going to be so screwed we should just kill
1773fc8cdd87SDavid Xu 		 * the process.
17743d0586d4SJulian Elischer 		 * how do we do that?
17753d0586d4SJulian Elischer 		 */
177648bfcdddSJulian Elischer 		PROC_LOCK(td->td_proc);
177748bfcdddSJulian Elischer 		psignal(td->td_proc, SIGSEGV);
177848bfcdddSJulian Elischer 		PROC_UNLOCK(td->td_proc);
17790252d203SDavid Xu 	} else {
17800252d203SDavid Xu 		/*
17810252d203SDavid Xu 		 * Optimisation:
17820252d203SDavid Xu 		 * Ensure that we have a spare thread available,
17830252d203SDavid Xu 		 * for when we re-enter the kernel.
17840252d203SDavid Xu 		 */
17850252d203SDavid Xu 		if (td->td_standin == NULL)
17860252d203SDavid Xu 			thread_alloc_spare(td, NULL);
17870252d203SDavid Xu 	}
17880252d203SDavid Xu 
17890252d203SDavid Xu 	/*
17900252d203SDavid Xu 	 * Clear thread mailbox first, then clear system tick count.
17910252d203SDavid Xu 	 * The order is important because thread_statclock() use
17920252d203SDavid Xu 	 * mailbox pointer to see if it is an userland thread or
17930252d203SDavid Xu 	 * an UTS kernel thread.
17940252d203SDavid Xu 	 */
179593a7aa79SJulian Elischer 	td->td_mailbox = NULL;
17965215b187SJeff Roberson 	td->td_usticks = 0;
179748bfcdddSJulian Elischer 	return (error);	/* go sync */
179844990b8cSJulian Elischer }
179944990b8cSJulian Elischer 
180044990b8cSJulian Elischer /*
180144990b8cSJulian Elischer  * Enforce single-threading.
180244990b8cSJulian Elischer  *
180344990b8cSJulian Elischer  * Returns 1 if the caller must abort (another thread is waiting to
180444990b8cSJulian Elischer  * exit the process or similar). Process is locked!
180544990b8cSJulian Elischer  * Returns 0 when you are successfully the only thread running.
180644990b8cSJulian Elischer  * A process has successfully single threaded in the suspend mode when
180744990b8cSJulian Elischer  * There are no threads in user mode. Threads in the kernel must be
180844990b8cSJulian Elischer  * allowed to continue until they get to the user boundary. They may even
180944990b8cSJulian Elischer  * copy out their return values and data before suspending. They may however be
181044990b8cSJulian Elischer  * accellerated in reaching the user boundary as we will wake up
181144990b8cSJulian Elischer  * any sleeping threads that are interruptable. (PCATCH).
181244990b8cSJulian Elischer  */
181344990b8cSJulian Elischer int
181444990b8cSJulian Elischer thread_single(int force_exit)
181544990b8cSJulian Elischer {
181644990b8cSJulian Elischer 	struct thread *td;
181744990b8cSJulian Elischer 	struct thread *td2;
181844990b8cSJulian Elischer 	struct proc *p;
181944990b8cSJulian Elischer 
182044990b8cSJulian Elischer 	td = curthread;
182144990b8cSJulian Elischer 	p = td->td_proc;
1822696058c3SJulian Elischer 	mtx_assert(&Giant, MA_OWNED);
182344990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
182444990b8cSJulian Elischer 	KASSERT((td != NULL), ("curthread is NULL"));
182544990b8cSJulian Elischer 
18262c10d16aSJeff Roberson 	if ((p->p_flag & P_THREADED) == 0 && p->p_numthreads == 1)
182744990b8cSJulian Elischer 		return (0);
182844990b8cSJulian Elischer 
1829e3b9bf71SJulian Elischer 	/* Is someone already single threading? */
1830e3b9bf71SJulian Elischer 	if (p->p_singlethread)
183144990b8cSJulian Elischer 		return (1);
183244990b8cSJulian Elischer 
183393a7aa79SJulian Elischer 	if (force_exit == SINGLE_EXIT) {
183444990b8cSJulian Elischer 		p->p_flag |= P_SINGLE_EXIT;
183593a7aa79SJulian Elischer 	} else
183644990b8cSJulian Elischer 		p->p_flag &= ~P_SINGLE_EXIT;
18371279572aSDavid Xu 	p->p_flag |= P_STOPPED_SINGLE;
183844990b8cSJulian Elischer 	p->p_singlethread = td;
18399d102777SJulian Elischer 	/* XXXKSE Which lock protects the below values? */
184044990b8cSJulian Elischer 	while ((p->p_numthreads - p->p_suspcount) != 1) {
184171fad9fdSJulian Elischer 		mtx_lock_spin(&sched_lock);
184244990b8cSJulian Elischer 		FOREACH_THREAD_IN_PROC(p, td2) {
184344990b8cSJulian Elischer 			if (td2 == td)
184444990b8cSJulian Elischer 				continue;
18450252d203SDavid Xu 			td->td_flags |= TDF_ASTPENDING;
184671fad9fdSJulian Elischer 			if (TD_IS_INHIBITED(td2)) {
18471279572aSDavid Xu 				if (force_exit == SINGLE_EXIT) {
18489d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2)) {
184971fad9fdSJulian Elischer 						thread_unsuspend_one(td2);
185071fad9fdSJulian Elischer 					}
185133862f40SDavid Xu 					if (TD_ON_SLEEPQ(td2) &&
185233862f40SDavid Xu 					    (td2->td_flags & TDF_SINTR)) {
1853e3b9bf71SJulian Elischer 						if (td2->td_flags & TDF_CVWAITQ)
185433862f40SDavid Xu 							cv_abort(td2);
1855e3b9bf71SJulian Elischer 						else
185633862f40SDavid Xu 							abortsleep(td2);
185771fad9fdSJulian Elischer 					}
18589d102777SJulian Elischer 				} else {
18599d102777SJulian Elischer 					if (TD_IS_SUSPENDED(td2))
18609d102777SJulian Elischer 						continue;
18615215b187SJeff Roberson 					/*
18625215b187SJeff Roberson 					 * maybe other inhibitted states too?
18635215b187SJeff Roberson 					 * XXXKSE Is it totally safe to
18645215b187SJeff Roberson 					 * suspend a non-interruptable thread?
18655215b187SJeff Roberson 					 */
186693a7aa79SJulian Elischer 					if (td2->td_inhibitors &
18675215b187SJeff Roberson 					    (TDI_SLEEPING | TDI_SWAPPED))
18689d102777SJulian Elischer 						thread_suspend_one(td2);
186944990b8cSJulian Elischer 				}
187044990b8cSJulian Elischer 			}
18719d102777SJulian Elischer 		}
18729d102777SJulian Elischer 		/*
18739d102777SJulian Elischer 		 * Maybe we suspended some threads.. was it enough?
18749d102777SJulian Elischer 		 */
18759d102777SJulian Elischer 		if ((p->p_numthreads - p->p_suspcount) == 1) {
18769d102777SJulian Elischer 			mtx_unlock_spin(&sched_lock);
18779d102777SJulian Elischer 			break;
18789d102777SJulian Elischer 		}
18799d102777SJulian Elischer 
188044990b8cSJulian Elischer 		/*
188144990b8cSJulian Elischer 		 * Wake us up when everyone else has suspended.
1882e3b9bf71SJulian Elischer 		 * In the mean time we suspend as well.
188344990b8cSJulian Elischer 		 */
188471fad9fdSJulian Elischer 		thread_suspend_one(td);
18852c10d16aSJeff Roberson 		/* XXX If you recursed this is broken. */
188644990b8cSJulian Elischer 		mtx_unlock(&Giant);
188744990b8cSJulian Elischer 		PROC_UNLOCK(p);
1888696058c3SJulian Elischer 		p->p_stats->p_ru.ru_nvcsw++;
188944990b8cSJulian Elischer 		mi_switch();
189044990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
189144990b8cSJulian Elischer 		mtx_lock(&Giant);
189244990b8cSJulian Elischer 		PROC_LOCK(p);
189344990b8cSJulian Elischer 	}
18945215b187SJeff Roberson 	if (force_exit == SINGLE_EXIT) {
18955215b187SJeff Roberson 		if (td->td_upcall) {
18965215b187SJeff Roberson 			mtx_lock_spin(&sched_lock);
18975215b187SJeff Roberson 			upcall_remove(td);
18985215b187SJeff Roberson 			mtx_unlock_spin(&sched_lock);
18995215b187SJeff Roberson 		}
19005c8329edSJulian Elischer 		kse_purge(p, td);
19015215b187SJeff Roberson 	}
190244990b8cSJulian Elischer 	return (0);
190344990b8cSJulian Elischer }
190444990b8cSJulian Elischer 
190544990b8cSJulian Elischer /*
190644990b8cSJulian Elischer  * Called in from locations that can safely check to see
190744990b8cSJulian Elischer  * whether we have to suspend or at least throttle for a
190844990b8cSJulian Elischer  * single-thread event (e.g. fork).
190944990b8cSJulian Elischer  *
191044990b8cSJulian Elischer  * Such locations include userret().
191144990b8cSJulian Elischer  * If the "return_instead" argument is non zero, the thread must be able to
191244990b8cSJulian Elischer  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
191344990b8cSJulian Elischer  *
191444990b8cSJulian Elischer  * The 'return_instead' argument tells the function if it may do a
191544990b8cSJulian Elischer  * thread_exit() or suspend, or whether the caller must abort and back
191644990b8cSJulian Elischer  * out instead.
191744990b8cSJulian Elischer  *
191844990b8cSJulian Elischer  * If the thread that set the single_threading request has set the
191944990b8cSJulian Elischer  * P_SINGLE_EXIT bit in the process flags then this call will never return
192044990b8cSJulian Elischer  * if 'return_instead' is false, but will exit.
192144990b8cSJulian Elischer  *
192244990b8cSJulian Elischer  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
192344990b8cSJulian Elischer  *---------------+--------------------+---------------------
192444990b8cSJulian Elischer  *       0       | returns 0          |   returns 0 or 1
192544990b8cSJulian Elischer  *               | when ST ends       |   immediatly
192644990b8cSJulian Elischer  *---------------+--------------------+---------------------
192744990b8cSJulian Elischer  *       1       | thread exits       |   returns 1
192844990b8cSJulian Elischer  *               |                    |  immediatly
192944990b8cSJulian Elischer  * 0 = thread_exit() or suspension ok,
193044990b8cSJulian Elischer  * other = return error instead of stopping the thread.
193144990b8cSJulian Elischer  *
193244990b8cSJulian Elischer  * While a full suspension is under effect, even a single threading
193344990b8cSJulian Elischer  * thread would be suspended if it made this call (but it shouldn't).
193444990b8cSJulian Elischer  * This call should only be made from places where
193544990b8cSJulian Elischer  * thread_exit() would be safe as that may be the outcome unless
193644990b8cSJulian Elischer  * return_instead is set.
193744990b8cSJulian Elischer  */
193844990b8cSJulian Elischer int
193944990b8cSJulian Elischer thread_suspend_check(int return_instead)
194044990b8cSJulian Elischer {
1941ecafb24bSJuli Mallett 	struct thread *td;
1942ecafb24bSJuli Mallett 	struct proc *p;
19435c8329edSJulian Elischer 	struct ksegrp *kg;
194444990b8cSJulian Elischer 
194544990b8cSJulian Elischer 	td = curthread;
194644990b8cSJulian Elischer 	p = td->td_proc;
19475c8329edSJulian Elischer 	kg = td->td_ksegrp;
194844990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
194944990b8cSJulian Elischer 	while (P_SHOULDSTOP(p)) {
19501279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
195144990b8cSJulian Elischer 			KASSERT(p->p_singlethread != NULL,
195244990b8cSJulian Elischer 			    ("singlethread not set"));
195344990b8cSJulian Elischer 			/*
1954e3b9bf71SJulian Elischer 			 * The only suspension in action is a
1955e3b9bf71SJulian Elischer 			 * single-threading. Single threader need not stop.
1956b6d5995eSJulian Elischer 			 * XXX Should be safe to access unlocked
1957b6d5995eSJulian Elischer 			 * as it can only be set to be true by us.
195844990b8cSJulian Elischer 			 */
1959e3b9bf71SJulian Elischer 			if (p->p_singlethread == td)
196044990b8cSJulian Elischer 				return (0);	/* Exempt from stopping. */
196144990b8cSJulian Elischer 		}
1962e3b9bf71SJulian Elischer 		if (return_instead)
196344990b8cSJulian Elischer 			return (1);
196444990b8cSJulian Elischer 
1965e574e444SDavid Xu 		mtx_lock_spin(&sched_lock);
1966e574e444SDavid Xu 		thread_stopped(p);
196744990b8cSJulian Elischer 		/*
196844990b8cSJulian Elischer 		 * If the process is waiting for us to exit,
196944990b8cSJulian Elischer 		 * this thread should just suicide.
19701279572aSDavid Xu 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
197144990b8cSJulian Elischer 		 */
197244990b8cSJulian Elischer 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
197344990b8cSJulian Elischer 			while (mtx_owned(&Giant))
197444990b8cSJulian Elischer 				mtx_unlock(&Giant);
19752c10d16aSJeff Roberson 			if (p->p_flag & P_THREADED)
197644990b8cSJulian Elischer 				thread_exit();
19772c10d16aSJeff Roberson 			else
19782c10d16aSJeff Roberson 				thr_exit1();
197944990b8cSJulian Elischer 		}
198044990b8cSJulian Elischer 
19812c10d16aSJeff Roberson 		mtx_assert(&Giant, MA_NOTOWNED);
198244990b8cSJulian Elischer 		/*
198344990b8cSJulian Elischer 		 * When a thread suspends, it just
198444990b8cSJulian Elischer 		 * moves to the processes's suspend queue
198544990b8cSJulian Elischer 		 * and stays there.
198644990b8cSJulian Elischer 		 */
198771fad9fdSJulian Elischer 		thread_suspend_one(td);
198844990b8cSJulian Elischer 		PROC_UNLOCK(p);
19891279572aSDavid Xu 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1990cf19bf91SJulian Elischer 			if (p->p_numthreads == p->p_suspcount) {
199171fad9fdSJulian Elischer 				thread_unsuspend_one(p->p_singlethread);
1992cf19bf91SJulian Elischer 			}
1993cf19bf91SJulian Elischer 		}
199420568366SJulian Elischer 		p->p_stats->p_ru.ru_nivcsw++;
199544990b8cSJulian Elischer 		mi_switch();
199644990b8cSJulian Elischer 		mtx_unlock_spin(&sched_lock);
199744990b8cSJulian Elischer 		PROC_LOCK(p);
199844990b8cSJulian Elischer 	}
199944990b8cSJulian Elischer 	return (0);
200044990b8cSJulian Elischer }
200144990b8cSJulian Elischer 
200235c32a76SDavid Xu void
200335c32a76SDavid Xu thread_suspend_one(struct thread *td)
200435c32a76SDavid Xu {
200535c32a76SDavid Xu 	struct proc *p = td->td_proc;
200635c32a76SDavid Xu 
200735c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
2008e574e444SDavid Xu 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
200935c32a76SDavid Xu 	p->p_suspcount++;
201071fad9fdSJulian Elischer 	TD_SET_SUSPENDED(td);
201135c32a76SDavid Xu 	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
201271fad9fdSJulian Elischer 	/*
201371fad9fdSJulian Elischer 	 * Hack: If we are suspending but are on the sleep queue
201471fad9fdSJulian Elischer 	 * then we are in msleep or the cv equivalent. We
201571fad9fdSJulian Elischer 	 * want to look like we have two Inhibitors.
20169d102777SJulian Elischer 	 * May already be set.. doesn't matter.
201771fad9fdSJulian Elischer 	 */
201871fad9fdSJulian Elischer 	if (TD_ON_SLEEPQ(td))
201971fad9fdSJulian Elischer 		TD_SET_SLEEPING(td);
202035c32a76SDavid Xu }
202135c32a76SDavid Xu 
202235c32a76SDavid Xu void
202335c32a76SDavid Xu thread_unsuspend_one(struct thread *td)
202435c32a76SDavid Xu {
202535c32a76SDavid Xu 	struct proc *p = td->td_proc;
202635c32a76SDavid Xu 
202735c32a76SDavid Xu 	mtx_assert(&sched_lock, MA_OWNED);
202835c32a76SDavid Xu 	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
202971fad9fdSJulian Elischer 	TD_CLR_SUSPENDED(td);
203035c32a76SDavid Xu 	p->p_suspcount--;
203171fad9fdSJulian Elischer 	setrunnable(td);
203235c32a76SDavid Xu }
203335c32a76SDavid Xu 
203444990b8cSJulian Elischer /*
203544990b8cSJulian Elischer  * Allow all threads blocked by single threading to continue running.
203644990b8cSJulian Elischer  */
203744990b8cSJulian Elischer void
203844990b8cSJulian Elischer thread_unsuspend(struct proc *p)
203944990b8cSJulian Elischer {
204044990b8cSJulian Elischer 	struct thread *td;
204144990b8cSJulian Elischer 
2042b6d5995eSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
204344990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
204444990b8cSJulian Elischer 	if (!P_SHOULDSTOP(p)) {
204544990b8cSJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
204635c32a76SDavid Xu 			thread_unsuspend_one(td);
204744990b8cSJulian Elischer 		}
20481279572aSDavid Xu 	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
204944990b8cSJulian Elischer 	    (p->p_numthreads == p->p_suspcount)) {
205044990b8cSJulian Elischer 		/*
205144990b8cSJulian Elischer 		 * Stopping everything also did the job for the single
205244990b8cSJulian Elischer 		 * threading request. Now we've downgraded to single-threaded,
205344990b8cSJulian Elischer 		 * let it continue.
205444990b8cSJulian Elischer 		 */
205535c32a76SDavid Xu 		thread_unsuspend_one(p->p_singlethread);
205644990b8cSJulian Elischer 	}
205744990b8cSJulian Elischer }
205844990b8cSJulian Elischer 
205944990b8cSJulian Elischer void
206044990b8cSJulian Elischer thread_single_end(void)
206144990b8cSJulian Elischer {
206244990b8cSJulian Elischer 	struct thread *td;
206344990b8cSJulian Elischer 	struct proc *p;
206444990b8cSJulian Elischer 
206544990b8cSJulian Elischer 	td = curthread;
206644990b8cSJulian Elischer 	p = td->td_proc;
206744990b8cSJulian Elischer 	PROC_LOCK_ASSERT(p, MA_OWNED);
20681279572aSDavid Xu 	p->p_flag &= ~P_STOPPED_SINGLE;
206944990b8cSJulian Elischer 	p->p_singlethread = NULL;
207049539972SJulian Elischer 	/*
207149539972SJulian Elischer 	 * If there are other threads they mey now run,
207249539972SJulian Elischer 	 * unless of course there is a blanket 'stop order'
207349539972SJulian Elischer 	 * on the process. The single threader must be allowed
207449539972SJulian Elischer 	 * to continue however as this is a bad place to stop.
207549539972SJulian Elischer 	 */
207649539972SJulian Elischer 	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
207749539972SJulian Elischer 		mtx_lock_spin(&sched_lock);
207849539972SJulian Elischer 		while (( td = TAILQ_FIRST(&p->p_suspended))) {
207971fad9fdSJulian Elischer 			thread_unsuspend_one(td);
208044990b8cSJulian Elischer 		}
208149539972SJulian Elischer 		mtx_unlock_spin(&sched_lock);
208249539972SJulian Elischer 	}
208349539972SJulian Elischer }
208449539972SJulian Elischer 
208544990b8cSJulian Elischer 
2086