144990b8cSJulian Elischer /* 244990b8cSJulian Elischer * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 344990b8cSJulian Elischer * All rights reserved. 444990b8cSJulian Elischer * 544990b8cSJulian Elischer * Redistribution and use in source and binary forms, with or without 644990b8cSJulian Elischer * modification, are permitted provided that the following conditions 744990b8cSJulian Elischer * are met: 844990b8cSJulian Elischer * 1. Redistributions of source code must retain the above copyright 944990b8cSJulian Elischer * notice(s), this list of conditions and the following disclaimer as 1044990b8cSJulian Elischer * the first lines of this file unmodified other than the possible 1144990b8cSJulian Elischer * addition of one or more copyright notices. 1244990b8cSJulian Elischer * 2. Redistributions in binary form must reproduce the above copyright 1344990b8cSJulian Elischer * notice(s), this list of conditions and the following disclaimer in the 1444990b8cSJulian Elischer * documentation and/or other materials provided with the distribution. 1544990b8cSJulian Elischer * 1644990b8cSJulian Elischer * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 1744990b8cSJulian Elischer * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 1844990b8cSJulian Elischer * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 1944990b8cSJulian Elischer * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 2044990b8cSJulian Elischer * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 2144990b8cSJulian Elischer * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 2244990b8cSJulian Elischer * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 2344990b8cSJulian Elischer * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2444990b8cSJulian Elischer * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2544990b8cSJulian Elischer * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 2644990b8cSJulian Elischer * DAMAGE. 2744990b8cSJulian Elischer */ 2844990b8cSJulian Elischer 29677b542eSDavid E. O'Brien #include <sys/cdefs.h> 30677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 31677b542eSDavid E. O'Brien 3244990b8cSJulian Elischer #include <sys/param.h> 3344990b8cSJulian Elischer #include <sys/systm.h> 3444990b8cSJulian Elischer #include <sys/kernel.h> 3544990b8cSJulian Elischer #include <sys/lock.h> 3644990b8cSJulian Elischer #include <sys/malloc.h> 3744990b8cSJulian Elischer #include <sys/mutex.h> 3844990b8cSJulian Elischer #include <sys/proc.h> 39904f1b77SJulian Elischer #include <sys/smp.h> 4044990b8cSJulian Elischer #include <sys/sysctl.h> 415c8329edSJulian Elischer #include <sys/sysproto.h> 4244990b8cSJulian Elischer #include <sys/filedesc.h> 43de028f5aSJeff Roberson #include <sys/sched.h> 4444990b8cSJulian Elischer #include <sys/signalvar.h> 4544f3b092SJohn Baldwin #include <sys/sleepqueue.h> 4644990b8cSJulian Elischer #include <sys/sx.h> 47de028f5aSJeff Roberson #include <sys/tty.h> 48961a7b24SJohn Baldwin #include <sys/turnstile.h> 4944990b8cSJulian Elischer #include <sys/user.h> 5044990b8cSJulian Elischer #include <sys/kse.h> 5144990b8cSJulian Elischer #include <sys/ktr.h> 52c76e33b6SJonathan Mini #include <sys/ucontext.h> 5344990b8cSJulian Elischer 5444990b8cSJulian Elischer #include <vm/vm.h> 5549a2507bSAlan Cox #include <vm/vm_extern.h> 5644990b8cSJulian Elischer #include <vm/vm_object.h> 5744990b8cSJulian Elischer #include <vm/pmap.h> 5844990b8cSJulian Elischer #include <vm/uma.h> 5944990b8cSJulian Elischer #include <vm/vm_map.h> 6044990b8cSJulian Elischer 6102fb42b0SPeter Wemm #include <machine/frame.h> 6202fb42b0SPeter Wemm 6344990b8cSJulian Elischer /* 644f0db5e0SJulian Elischer * KSEGRP related storage. 6544990b8cSJulian Elischer */ 664f0db5e0SJulian Elischer static uma_zone_t ksegrp_zone; 674f0db5e0SJulian Elischer static uma_zone_t kse_zone; 6844990b8cSJulian Elischer static uma_zone_t thread_zone; 695215b187SJeff Roberson static uma_zone_t upcall_zone; 7044990b8cSJulian Elischer 714f0db5e0SJulian Elischer /* DEBUG ONLY */ 7244990b8cSJulian Elischer SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 73696058c3SJulian Elischer static int thread_debug = 0; 74696058c3SJulian Elischer SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW, 75696058c3SJulian Elischer &thread_debug, 0, "thread debug"); 76fdc5ecd2SDavid Xu 7784eef27dSJulian Elischer static int max_threads_per_proc = 1500; 78fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 794f0db5e0SJulian Elischer &max_threads_per_proc, 0, "Limit on threads per proc"); 804f0db5e0SJulian Elischer 8184eef27dSJulian Elischer static int max_groups_per_proc = 500; 82fdc5ecd2SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 83fdc5ecd2SDavid Xu &max_groups_per_proc, 0, "Limit on thread groups per proc"); 84fdc5ecd2SDavid Xu 850252d203SDavid Xu static int max_threads_hits; 860252d203SDavid Xu SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD, 870252d203SDavid Xu &max_threads_hits, 0, ""); 880252d203SDavid Xu 895215b187SJeff Roberson static int virtual_cpu; 905215b187SJeff Roberson 9144990b8cSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 9244990b8cSJulian Elischer 935215b187SJeff Roberson TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 945c8329edSJulian Elischer TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); 955c8329edSJulian Elischer TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 965215b187SJeff Roberson TAILQ_HEAD(, kse_upcall) zombie_upcalls = 975215b187SJeff Roberson TAILQ_HEAD_INITIALIZER(zombie_upcalls); 985215b187SJeff Roberson struct mtx kse_zombie_lock; 995215b187SJeff Roberson MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN); 10044990b8cSJulian Elischer 101696058c3SJulian Elischer static void kse_purge(struct proc *p, struct thread *td); 1025215b187SJeff Roberson static void kse_purge_group(struct thread *td); 1034b4866edSDavid Xu static int thread_update_usr_ticks(struct thread *td, int user); 1045215b187SJeff Roberson static void thread_alloc_spare(struct thread *td, struct thread *spare); 1055215b187SJeff Roberson 1065215b187SJeff Roberson static int 1075215b187SJeff Roberson sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS) 1085215b187SJeff Roberson { 1095215b187SJeff Roberson int error, new_val; 1105215b187SJeff Roberson int def_val; 1115215b187SJeff Roberson 1125215b187SJeff Roberson #ifdef SMP 1135215b187SJeff Roberson def_val = mp_ncpus; 1145215b187SJeff Roberson #else 1155215b187SJeff Roberson def_val = 1; 1165215b187SJeff Roberson #endif 1175215b187SJeff Roberson if (virtual_cpu == 0) 1185215b187SJeff Roberson new_val = def_val; 1195215b187SJeff Roberson else 1205215b187SJeff Roberson new_val = virtual_cpu; 1215215b187SJeff Roberson error = sysctl_handle_int(oidp, &new_val, 0, req); 1225215b187SJeff Roberson if (error != 0 || req->newptr == NULL) 1235215b187SJeff Roberson return (error); 1245215b187SJeff Roberson if (new_val < 0) 1255215b187SJeff Roberson return (EINVAL); 1265215b187SJeff Roberson virtual_cpu = new_val; 1275215b187SJeff Roberson return (0); 1285215b187SJeff Roberson } 1295215b187SJeff Roberson 1305215b187SJeff Roberson /* DEBUG ONLY */ 1315215b187SJeff Roberson SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW, 1325215b187SJeff Roberson 0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I", 1335215b187SJeff Roberson "debug virtual cpus"); 1345c8329edSJulian Elischer 13544990b8cSJulian Elischer /* 136fdcac928SMarcel Moolenaar * Thread ID allocator. The allocator keeps track of assigned IDs by 137fdcac928SMarcel Moolenaar * using a bitmap. The bitmap is created in parts. The parts are linked 138fdcac928SMarcel Moolenaar * together. 139fdcac928SMarcel Moolenaar */ 140fdcac928SMarcel Moolenaar typedef u_long tid_bitmap_word; 141fdcac928SMarcel Moolenaar 142fdcac928SMarcel Moolenaar #define TID_IDS_PER_PART 1024 143fdcac928SMarcel Moolenaar #define TID_IDS_PER_IDX (sizeof(tid_bitmap_word) << 3) 144fdcac928SMarcel Moolenaar #define TID_BITMAP_SIZE (TID_IDS_PER_PART / TID_IDS_PER_IDX) 145fdcac928SMarcel Moolenaar #define TID_MIN (PID_MAX + 1) 146fdcac928SMarcel Moolenaar 147fdcac928SMarcel Moolenaar struct tid_bitmap_part { 148fdcac928SMarcel Moolenaar STAILQ_ENTRY(tid_bitmap_part) bmp_next; 149fdcac928SMarcel Moolenaar tid_bitmap_word bmp_bitmap[TID_BITMAP_SIZE]; 150fdcac928SMarcel Moolenaar int bmp_base; 151fdcac928SMarcel Moolenaar int bmp_free; 152fdcac928SMarcel Moolenaar }; 153fdcac928SMarcel Moolenaar 154fdcac928SMarcel Moolenaar static STAILQ_HEAD(, tid_bitmap_part) tid_bitmap = 155fdcac928SMarcel Moolenaar STAILQ_HEAD_INITIALIZER(tid_bitmap); 156fdcac928SMarcel Moolenaar static uma_zone_t tid_zone; 157fdcac928SMarcel Moolenaar 158fdcac928SMarcel Moolenaar struct mtx tid_lock; 159fdcac928SMarcel Moolenaar MTX_SYSINIT(tid_lock, &tid_lock, "TID lock", MTX_DEF); 160fdcac928SMarcel Moolenaar 161fdcac928SMarcel Moolenaar /* 162696058c3SJulian Elischer * Prepare a thread for use. 16344990b8cSJulian Elischer */ 16444990b8cSJulian Elischer static void 16544990b8cSJulian Elischer thread_ctor(void *mem, int size, void *arg) 16644990b8cSJulian Elischer { 16744990b8cSJulian Elischer struct thread *td; 16844990b8cSJulian Elischer 16944990b8cSJulian Elischer td = (struct thread *)mem; 170fdcac928SMarcel Moolenaar td->td_tid = 0; 17171fad9fdSJulian Elischer td->td_state = TDS_INACTIVE; 172060563ecSJulian Elischer td->td_oncpu = NOCPU; 173139b7550SJohn Baldwin td->td_critnest = 1; 17444990b8cSJulian Elischer } 17544990b8cSJulian Elischer 17644990b8cSJulian Elischer /* 17744990b8cSJulian Elischer * Reclaim a thread after use. 17844990b8cSJulian Elischer */ 17944990b8cSJulian Elischer static void 18044990b8cSJulian Elischer thread_dtor(void *mem, int size, void *arg) 18144990b8cSJulian Elischer { 18244990b8cSJulian Elischer struct thread *td; 183fdcac928SMarcel Moolenaar struct tid_bitmap_part *bmp; 184fdcac928SMarcel Moolenaar int bit, idx, tid; 18544990b8cSJulian Elischer 18644990b8cSJulian Elischer td = (struct thread *)mem; 18744990b8cSJulian Elischer 188fdcac928SMarcel Moolenaar if (td->td_tid > PID_MAX) { 189fdcac928SMarcel Moolenaar STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 190fdcac928SMarcel Moolenaar if (td->td_tid >= bmp->bmp_base && 191fdcac928SMarcel Moolenaar td->td_tid < bmp->bmp_base + TID_IDS_PER_PART) 192fdcac928SMarcel Moolenaar break; 193fdcac928SMarcel Moolenaar } 194fdcac928SMarcel Moolenaar KASSERT(bmp != NULL, ("No TID bitmap?")); 195fdcac928SMarcel Moolenaar mtx_lock(&tid_lock); 196fdcac928SMarcel Moolenaar tid = td->td_tid - bmp->bmp_base; 197fdcac928SMarcel Moolenaar idx = tid / TID_IDS_PER_IDX; 198fdcac928SMarcel Moolenaar bit = 1UL << (tid % TID_IDS_PER_IDX); 199fdcac928SMarcel Moolenaar bmp->bmp_bitmap[idx] |= bit; 200fdcac928SMarcel Moolenaar bmp->bmp_free++; 201fdcac928SMarcel Moolenaar mtx_unlock(&tid_lock); 202fdcac928SMarcel Moolenaar } 203fdcac928SMarcel Moolenaar 20444990b8cSJulian Elischer #ifdef INVARIANTS 20544990b8cSJulian Elischer /* Verify that this thread is in a safe state to free. */ 20644990b8cSJulian Elischer switch (td->td_state) { 20771fad9fdSJulian Elischer case TDS_INHIBITED: 20871fad9fdSJulian Elischer case TDS_RUNNING: 20971fad9fdSJulian Elischer case TDS_CAN_RUN: 21044990b8cSJulian Elischer case TDS_RUNQ: 21144990b8cSJulian Elischer /* 21244990b8cSJulian Elischer * We must never unlink a thread that is in one of 21344990b8cSJulian Elischer * these states, because it is currently active. 21444990b8cSJulian Elischer */ 21544990b8cSJulian Elischer panic("bad state for thread unlinking"); 21644990b8cSJulian Elischer /* NOTREACHED */ 21771fad9fdSJulian Elischer case TDS_INACTIVE: 21844990b8cSJulian Elischer break; 21944990b8cSJulian Elischer default: 22044990b8cSJulian Elischer panic("bad thread state"); 22144990b8cSJulian Elischer /* NOTREACHED */ 22244990b8cSJulian Elischer } 22344990b8cSJulian Elischer #endif 22444990b8cSJulian Elischer } 22544990b8cSJulian Elischer 22644990b8cSJulian Elischer /* 22744990b8cSJulian Elischer * Initialize type-stable parts of a thread (when newly created). 22844990b8cSJulian Elischer */ 22944990b8cSJulian Elischer static void 23044990b8cSJulian Elischer thread_init(void *mem, int size) 23144990b8cSJulian Elischer { 23244990b8cSJulian Elischer struct thread *td; 23344990b8cSJulian Elischer 23444990b8cSJulian Elischer td = (struct thread *)mem; 23549a2507bSAlan Cox vm_thread_new(td, 0); 23644990b8cSJulian Elischer cpu_thread_setup(td); 23744f3b092SJohn Baldwin td->td_sleepqueue = sleepq_alloc(); 238961a7b24SJohn Baldwin td->td_turnstile = turnstile_alloc(); 239de028f5aSJeff Roberson td->td_sched = (struct td_sched *)&td[1]; 24044990b8cSJulian Elischer } 24144990b8cSJulian Elischer 24244990b8cSJulian Elischer /* 24344990b8cSJulian Elischer * Tear down type-stable parts of a thread (just before being discarded). 24444990b8cSJulian Elischer */ 24544990b8cSJulian Elischer static void 24644990b8cSJulian Elischer thread_fini(void *mem, int size) 24744990b8cSJulian Elischer { 24844990b8cSJulian Elischer struct thread *td; 24944990b8cSJulian Elischer 25044990b8cSJulian Elischer td = (struct thread *)mem; 251961a7b24SJohn Baldwin turnstile_free(td->td_turnstile); 25244f3b092SJohn Baldwin sleepq_free(td->td_sleepqueue); 25349a2507bSAlan Cox vm_thread_dispose(td); 25444990b8cSJulian Elischer } 2555215b187SJeff Roberson 256de028f5aSJeff Roberson /* 257de028f5aSJeff Roberson * Initialize type-stable parts of a kse (when newly created). 258de028f5aSJeff Roberson */ 259de028f5aSJeff Roberson static void 260de028f5aSJeff Roberson kse_init(void *mem, int size) 261de028f5aSJeff Roberson { 262de028f5aSJeff Roberson struct kse *ke; 263de028f5aSJeff Roberson 264de028f5aSJeff Roberson ke = (struct kse *)mem; 265de028f5aSJeff Roberson ke->ke_sched = (struct ke_sched *)&ke[1]; 266de028f5aSJeff Roberson } 2675215b187SJeff Roberson 268de028f5aSJeff Roberson /* 269de028f5aSJeff Roberson * Initialize type-stable parts of a ksegrp (when newly created). 270de028f5aSJeff Roberson */ 271de028f5aSJeff Roberson static void 272de028f5aSJeff Roberson ksegrp_init(void *mem, int size) 273de028f5aSJeff Roberson { 274de028f5aSJeff Roberson struct ksegrp *kg; 275de028f5aSJeff Roberson 276de028f5aSJeff Roberson kg = (struct ksegrp *)mem; 277de028f5aSJeff Roberson kg->kg_sched = (struct kg_sched *)&kg[1]; 278de028f5aSJeff Roberson } 27944990b8cSJulian Elischer 28044990b8cSJulian Elischer /* 2815215b187SJeff Roberson * KSE is linked into kse group. 2825c8329edSJulian Elischer */ 2835c8329edSJulian Elischer void 2845c8329edSJulian Elischer kse_link(struct kse *ke, struct ksegrp *kg) 2855c8329edSJulian Elischer { 2865c8329edSJulian Elischer struct proc *p = kg->kg_proc; 2875c8329edSJulian Elischer 2885c8329edSJulian Elischer TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); 2895c8329edSJulian Elischer kg->kg_kses++; 2905c8329edSJulian Elischer ke->ke_state = KES_UNQUEUED; 2915c8329edSJulian Elischer ke->ke_proc = p; 2925c8329edSJulian Elischer ke->ke_ksegrp = kg; 2935c8329edSJulian Elischer ke->ke_thread = NULL; 2945c8329edSJulian Elischer ke->ke_oncpu = NOCPU; 2955215b187SJeff Roberson ke->ke_flags = 0; 2965c8329edSJulian Elischer } 2975c8329edSJulian Elischer 2985c8329edSJulian Elischer void 2995c8329edSJulian Elischer kse_unlink(struct kse *ke) 3005c8329edSJulian Elischer { 3015c8329edSJulian Elischer struct ksegrp *kg; 3025c8329edSJulian Elischer 3035c8329edSJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 3045c8329edSJulian Elischer kg = ke->ke_ksegrp; 3055c8329edSJulian Elischer TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 3065215b187SJeff Roberson if (ke->ke_state == KES_IDLE) { 3075215b187SJeff Roberson TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 3085215b187SJeff Roberson kg->kg_idle_kses--; 3096f8132a8SJulian Elischer } 310ab2baa72SDavid Xu --kg->kg_kses; 3115c8329edSJulian Elischer /* 3125c8329edSJulian Elischer * Aggregate stats from the KSE 3135c8329edSJulian Elischer */ 3145c8329edSJulian Elischer kse_stash(ke); 3155c8329edSJulian Elischer } 3165c8329edSJulian Elischer 3175c8329edSJulian Elischer void 3185c8329edSJulian Elischer ksegrp_link(struct ksegrp *kg, struct proc *p) 3195c8329edSJulian Elischer { 3205c8329edSJulian Elischer 3215c8329edSJulian Elischer TAILQ_INIT(&kg->kg_threads); 3225c8329edSJulian Elischer TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 3235c8329edSJulian Elischer TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */ 3245c8329edSJulian Elischer TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ 3255215b187SJeff Roberson TAILQ_INIT(&kg->kg_iq); /* all idle kses in ksegrp */ 3265215b187SJeff Roberson TAILQ_INIT(&kg->kg_upcalls); /* all upcall structure in ksegrp */ 3275c8329edSJulian Elischer kg->kg_proc = p; 3285215b187SJeff Roberson /* 3295215b187SJeff Roberson * the following counters are in the -zero- section 3305215b187SJeff Roberson * and may not need clearing 3315215b187SJeff Roberson */ 3325c8329edSJulian Elischer kg->kg_numthreads = 0; 3335c8329edSJulian Elischer kg->kg_runnable = 0; 3345c8329edSJulian Elischer kg->kg_kses = 0; 3355c8329edSJulian Elischer kg->kg_runq_kses = 0; /* XXXKSE change name */ 3365215b187SJeff Roberson kg->kg_idle_kses = 0; 3375215b187SJeff Roberson kg->kg_numupcalls = 0; 3385c8329edSJulian Elischer /* link it in now that it's consistent */ 3395c8329edSJulian Elischer p->p_numksegrps++; 3405c8329edSJulian Elischer TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 3415c8329edSJulian Elischer } 3425c8329edSJulian Elischer 3435c8329edSJulian Elischer void 3445c8329edSJulian Elischer ksegrp_unlink(struct ksegrp *kg) 3455c8329edSJulian Elischer { 3465c8329edSJulian Elischer struct proc *p; 3475c8329edSJulian Elischer 3485c8329edSJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 3495215b187SJeff Roberson KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads")); 3505215b187SJeff Roberson KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses")); 3515215b187SJeff Roberson KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls")); 3525215b187SJeff Roberson 3535c8329edSJulian Elischer p = kg->kg_proc; 3545c8329edSJulian Elischer TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 3555c8329edSJulian Elischer p->p_numksegrps--; 3565c8329edSJulian Elischer /* 3575c8329edSJulian Elischer * Aggregate stats from the KSE 3585c8329edSJulian Elischer */ 3595c8329edSJulian Elischer ksegrp_stash(kg); 3605c8329edSJulian Elischer } 3615c8329edSJulian Elischer 3625215b187SJeff Roberson struct kse_upcall * 3635215b187SJeff Roberson upcall_alloc(void) 3645215b187SJeff Roberson { 3655215b187SJeff Roberson struct kse_upcall *ku; 3665215b187SJeff Roberson 36730621e14SDavid Xu ku = uma_zalloc(upcall_zone, M_WAITOK); 3685215b187SJeff Roberson bzero(ku, sizeof(*ku)); 3695215b187SJeff Roberson return (ku); 3705215b187SJeff Roberson } 3715215b187SJeff Roberson 3725215b187SJeff Roberson void 3735215b187SJeff Roberson upcall_free(struct kse_upcall *ku) 3745215b187SJeff Roberson { 3755215b187SJeff Roberson 3765215b187SJeff Roberson uma_zfree(upcall_zone, ku); 3775215b187SJeff Roberson } 3785215b187SJeff Roberson 3795215b187SJeff Roberson void 3805215b187SJeff Roberson upcall_link(struct kse_upcall *ku, struct ksegrp *kg) 3815215b187SJeff Roberson { 3825215b187SJeff Roberson 3835215b187SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 3845215b187SJeff Roberson TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link); 3855215b187SJeff Roberson ku->ku_ksegrp = kg; 3865215b187SJeff Roberson kg->kg_numupcalls++; 3875215b187SJeff Roberson } 3885215b187SJeff Roberson 3895215b187SJeff Roberson void 3905215b187SJeff Roberson upcall_unlink(struct kse_upcall *ku) 3915215b187SJeff Roberson { 3925215b187SJeff Roberson struct ksegrp *kg = ku->ku_ksegrp; 3935215b187SJeff Roberson 3945215b187SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 3955215b187SJeff Roberson KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__)); 3965215b187SJeff Roberson TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link); 3975215b187SJeff Roberson kg->kg_numupcalls--; 3985215b187SJeff Roberson upcall_stash(ku); 3995215b187SJeff Roberson } 4005215b187SJeff Roberson 4015215b187SJeff Roberson void 4025215b187SJeff Roberson upcall_remove(struct thread *td) 4035215b187SJeff Roberson { 4045215b187SJeff Roberson 4055215b187SJeff Roberson if (td->td_upcall) { 4065215b187SJeff Roberson td->td_upcall->ku_owner = NULL; 4075215b187SJeff Roberson upcall_unlink(td->td_upcall); 4085215b187SJeff Roberson td->td_upcall = 0; 4095215b187SJeff Roberson } 4105215b187SJeff Roberson } 4115215b187SJeff Roberson 4125c8329edSJulian Elischer /* 4135215b187SJeff Roberson * For a newly created process, 4145215b187SJeff Roberson * link up all the structures and its initial threads etc. 4155c8329edSJulian Elischer */ 4165c8329edSJulian Elischer void 4175c8329edSJulian Elischer proc_linkup(struct proc *p, struct ksegrp *kg, 4185c8329edSJulian Elischer struct kse *ke, struct thread *td) 4195c8329edSJulian Elischer { 4205c8329edSJulian Elischer 4215c8329edSJulian Elischer TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 4225c8329edSJulian Elischer TAILQ_INIT(&p->p_threads); /* all threads in proc */ 4235c8329edSJulian Elischer TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 4245c8329edSJulian Elischer p->p_numksegrps = 0; 4255c8329edSJulian Elischer p->p_numthreads = 0; 4265c8329edSJulian Elischer 4275c8329edSJulian Elischer ksegrp_link(kg, p); 4285c8329edSJulian Elischer kse_link(ke, kg); 4295c8329edSJulian Elischer thread_link(td, kg); 4305c8329edSJulian Elischer } 4315c8329edSJulian Elischer 432702b2a17SMarcel Moolenaar #ifndef _SYS_SYSPROTO_H_ 433702b2a17SMarcel Moolenaar struct kse_switchin_args { 434702b2a17SMarcel Moolenaar const struct __mcontext *mcp; 435702b2a17SMarcel Moolenaar long val; 436702b2a17SMarcel Moolenaar long *loc; 437702b2a17SMarcel Moolenaar }; 438702b2a17SMarcel Moolenaar #endif 439702b2a17SMarcel Moolenaar 440702b2a17SMarcel Moolenaar int 441702b2a17SMarcel Moolenaar kse_switchin(struct thread *td, struct kse_switchin_args *uap) 442702b2a17SMarcel Moolenaar { 443702b2a17SMarcel Moolenaar mcontext_t mc; 444702b2a17SMarcel Moolenaar int error; 445702b2a17SMarcel Moolenaar 446702b2a17SMarcel Moolenaar error = (uap->mcp == NULL) ? EINVAL : 0; 447702b2a17SMarcel Moolenaar if (!error) 448702b2a17SMarcel Moolenaar error = copyin(uap->mcp, &mc, sizeof(mc)); 449ccb46febSMarcel Moolenaar if (!error && uap->loc != NULL) 450ccb46febSMarcel Moolenaar error = (suword(uap->loc, uap->val) != 0) ? EINVAL : 0; 451702b2a17SMarcel Moolenaar if (!error) 452702b2a17SMarcel Moolenaar error = set_mcontext(td, &mc); 453702b2a17SMarcel Moolenaar return ((error == 0) ? EJUSTRETURN : error); 454702b2a17SMarcel Moolenaar } 455702b2a17SMarcel Moolenaar 4565215b187SJeff Roberson /* 4575215b187SJeff Roberson struct kse_thr_interrupt_args { 4585215b187SJeff Roberson struct kse_thr_mailbox * tmbx; 459dd7da9aaSDavid Xu int cmd; 460dd7da9aaSDavid Xu long data; 4615215b187SJeff Roberson }; 4625215b187SJeff Roberson */ 4635c8329edSJulian Elischer int 4645c8329edSJulian Elischer kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap) 4655c8329edSJulian Elischer { 46634e80e02SDavid Xu struct proc *p; 46734e80e02SDavid Xu struct thread *td2; 4685c8329edSJulian Elischer 469adac9400SDavid Xu p = td->td_proc; 470ab2baa72SDavid Xu 471dd7da9aaSDavid Xu if (!(p->p_flag & P_SA)) 4728db2431fSDavid Xu return (EINVAL); 4739dde3bc9SDavid Xu 474dd7da9aaSDavid Xu switch (uap->cmd) { 475dd7da9aaSDavid Xu case KSE_INTR_SENDSIG: 476dd7da9aaSDavid Xu if (uap->data < 0 || uap->data > _SIG_MAXSIG) 477dd7da9aaSDavid Xu return (EINVAL); 478dd7da9aaSDavid Xu case KSE_INTR_INTERRUPT: 479dd7da9aaSDavid Xu case KSE_INTR_RESTART: 4809dde3bc9SDavid Xu PROC_LOCK(p); 48134e80e02SDavid Xu mtx_lock_spin(&sched_lock); 48234e80e02SDavid Xu FOREACH_THREAD_IN_PROC(p, td2) { 4839dde3bc9SDavid Xu if (td2->td_mailbox == uap->tmbx) 4849dde3bc9SDavid Xu break; 4859dde3bc9SDavid Xu } 4869dde3bc9SDavid Xu if (td2 == NULL) { 4879dde3bc9SDavid Xu mtx_unlock_spin(&sched_lock); 4889dde3bc9SDavid Xu PROC_UNLOCK(p); 4899dde3bc9SDavid Xu return (ESRCH); 4909dde3bc9SDavid Xu } 491dd7da9aaSDavid Xu if (uap->cmd == KSE_INTR_SENDSIG) { 492dd7da9aaSDavid Xu if (uap->data > 0) { 4939dde3bc9SDavid Xu td2->td_flags &= ~TDF_INTERRUPT; 4949dde3bc9SDavid Xu mtx_unlock_spin(&sched_lock); 495dd7da9aaSDavid Xu tdsignal(td2, (int)uap->data, SIGTARGET_TD); 496dd7da9aaSDavid Xu } else { 4979dde3bc9SDavid Xu mtx_unlock_spin(&sched_lock); 498dd7da9aaSDavid Xu } 4999dde3bc9SDavid Xu } else { 5009dde3bc9SDavid Xu td2->td_flags |= TDF_INTERRUPT | TDF_ASTPENDING; 5019dde3bc9SDavid Xu if (TD_CAN_UNBIND(td2)) 502df9c6cdaSDavid Xu td2->td_upcall->ku_flags |= KUF_DOUPCALL; 503dd7da9aaSDavid Xu if (uap->cmd == KSE_INTR_INTERRUPT) 5049dde3bc9SDavid Xu td2->td_intrval = EINTR; 505dd7da9aaSDavid Xu else 5069dde3bc9SDavid Xu td2->td_intrval = ERESTART; 50744f3b092SJohn Baldwin if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) 50844f3b092SJohn Baldwin sleepq_abort(td2); 50934e80e02SDavid Xu mtx_unlock_spin(&sched_lock); 5109dde3bc9SDavid Xu } 5119dde3bc9SDavid Xu PROC_UNLOCK(p); 512dd7da9aaSDavid Xu break; 513dd7da9aaSDavid Xu case KSE_INTR_SIGEXIT: 514dd7da9aaSDavid Xu if (uap->data < 1 || uap->data > _SIG_MAXSIG) 515dd7da9aaSDavid Xu return (EINVAL); 516dd7da9aaSDavid Xu PROC_LOCK(p); 517dd7da9aaSDavid Xu sigexit(td, (int)uap->data); 518dd7da9aaSDavid Xu break; 519dd7da9aaSDavid Xu default: 520dd7da9aaSDavid Xu return (EINVAL); 521dd7da9aaSDavid Xu } 5227b290dd0SDavid Xu return (0); 52334e80e02SDavid Xu } 5245c8329edSJulian Elischer 5255215b187SJeff Roberson /* 5265215b187SJeff Roberson struct kse_exit_args { 5275215b187SJeff Roberson register_t dummy; 5285215b187SJeff Roberson }; 5295215b187SJeff Roberson */ 5305c8329edSJulian Elischer int 5315c8329edSJulian Elischer kse_exit(struct thread *td, struct kse_exit_args *uap) 5325c8329edSJulian Elischer { 5335c8329edSJulian Elischer struct proc *p; 5345c8329edSJulian Elischer struct ksegrp *kg; 535450c38d0SDavid Xu struct kse *ke; 5362b035cbeSJulian Elischer struct kse_upcall *ku, *ku2; 5372b035cbeSJulian Elischer int error, count; 5385c8329edSJulian Elischer 5395c8329edSJulian Elischer p = td->td_proc; 5402b035cbeSJulian Elischer if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td)) 5417b290dd0SDavid Xu return (EINVAL); 5425c8329edSJulian Elischer kg = td->td_ksegrp; 5432b035cbeSJulian Elischer count = 0; 5445c8329edSJulian Elischer PROC_LOCK(p); 5455c8329edSJulian Elischer mtx_lock_spin(&sched_lock); 5462b035cbeSJulian Elischer FOREACH_UPCALL_IN_GROUP(kg, ku2) { 5472b035cbeSJulian Elischer if (ku2->ku_flags & KUF_EXITING) 5482b035cbeSJulian Elischer count++; 5492b035cbeSJulian Elischer } 5502b035cbeSJulian Elischer if ((kg->kg_numupcalls - count) == 1 && 5512b035cbeSJulian Elischer (kg->kg_numthreads > 1)) { 5525c8329edSJulian Elischer mtx_unlock_spin(&sched_lock); 5535c8329edSJulian Elischer PROC_UNLOCK(p); 5545c8329edSJulian Elischer return (EDEADLK); 5555c8329edSJulian Elischer } 5562b035cbeSJulian Elischer ku->ku_flags |= KUF_EXITING; 5572b035cbeSJulian Elischer mtx_unlock_spin(&sched_lock); 5582b035cbeSJulian Elischer PROC_UNLOCK(p); 5592b035cbeSJulian Elischer error = suword(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE); 5602b035cbeSJulian Elischer PROC_LOCK(p); 5612b035cbeSJulian Elischer if (error) 5622b035cbeSJulian Elischer psignal(p, SIGSEGV); 5632b035cbeSJulian Elischer mtx_lock_spin(&sched_lock); 5645215b187SJeff Roberson upcall_remove(td); 5652b035cbeSJulian Elischer ke = td->td_kse; 566450c38d0SDavid Xu if (p->p_numthreads == 1) { 5675215b187SJeff Roberson kse_purge(p, td); 5680e2a4d3aSDavid Xu p->p_flag &= ~P_SA; 5695c8329edSJulian Elischer mtx_unlock_spin(&sched_lock); 5705c8329edSJulian Elischer PROC_UNLOCK(p); 5715c8329edSJulian Elischer } else { 5725215b187SJeff Roberson if (kg->kg_numthreads == 1) { /* Shutdown a group */ 5735215b187SJeff Roberson kse_purge_group(td); 574450c38d0SDavid Xu ke->ke_flags |= KEF_EXIT; 5755215b187SJeff Roberson } 576e574e444SDavid Xu thread_stopped(p); 5775c8329edSJulian Elischer thread_exit(); 5785c8329edSJulian Elischer /* NOTREACHED */ 5795c8329edSJulian Elischer } 5807b290dd0SDavid Xu return (0); 5815c8329edSJulian Elischer } 5825c8329edSJulian Elischer 583696058c3SJulian Elischer /* 58493a7aa79SJulian Elischer * Either becomes an upcall or waits for an awakening event and 5855215b187SJeff Roberson * then becomes an upcall. Only error cases return. 5865215b187SJeff Roberson */ 5875215b187SJeff Roberson /* 5885215b187SJeff Roberson struct kse_release_args { 589eb117d5cSDavid Xu struct timespec *timeout; 5905215b187SJeff Roberson }; 591696058c3SJulian Elischer */ 5925c8329edSJulian Elischer int 5935c8329edSJulian Elischer kse_release(struct thread *td, struct kse_release_args *uap) 5945c8329edSJulian Elischer { 5955c8329edSJulian Elischer struct proc *p; 596696058c3SJulian Elischer struct ksegrp *kg; 597cd4f6ebbSDavid Xu struct kse_upcall *ku; 598cd4f6ebbSDavid Xu struct timespec timeout; 599eb117d5cSDavid Xu struct timeval tv; 6009dde3bc9SDavid Xu sigset_t sigset; 601eb117d5cSDavid Xu int error; 6025c8329edSJulian Elischer 6035c8329edSJulian Elischer p = td->td_proc; 604696058c3SJulian Elischer kg = td->td_ksegrp; 605cd4f6ebbSDavid Xu if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td)) 6065c8329edSJulian Elischer return (EINVAL); 607eb117d5cSDavid Xu if (uap->timeout != NULL) { 608eb117d5cSDavid Xu if ((error = copyin(uap->timeout, &timeout, sizeof(timeout)))) 609eb117d5cSDavid Xu return (error); 610eb117d5cSDavid Xu TIMESPEC_TO_TIMEVAL(&tv, &timeout); 611eb117d5cSDavid Xu } 612cd4f6ebbSDavid Xu if (td->td_flags & TDF_SA) 6131d5a24beSDavid Xu td->td_pflags |= TDP_UPCALLING; 6149dde3bc9SDavid Xu else { 6159dde3bc9SDavid Xu ku->ku_mflags = fuword(&ku->ku_mailbox->km_flags); 6169dde3bc9SDavid Xu if (ku->ku_mflags == -1) { 617eb117d5cSDavid Xu PROC_LOCK(p); 6189dde3bc9SDavid Xu sigexit(td, SIGSEGV); 6199dde3bc9SDavid Xu } 6209dde3bc9SDavid Xu } 6219dde3bc9SDavid Xu PROC_LOCK(p); 6229dde3bc9SDavid Xu if (ku->ku_mflags & KMF_WAITSIGEVENT) { 6239dde3bc9SDavid Xu /* UTS wants to wait for signal event */ 6249dde3bc9SDavid Xu if (!(p->p_flag & P_SIGEVENT) && !(ku->ku_flags & KUF_DOUPCALL)) 6259dde3bc9SDavid Xu error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH, 6269dde3bc9SDavid Xu "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0)); 6279dde3bc9SDavid Xu p->p_flag &= ~P_SIGEVENT; 6289dde3bc9SDavid Xu sigset = p->p_siglist; 6299dde3bc9SDavid Xu PROC_UNLOCK(p); 6309dde3bc9SDavid Xu error = copyout(&sigset, &ku->ku_mailbox->km_sigscaught, 6319dde3bc9SDavid Xu sizeof(sigset)); 6329dde3bc9SDavid Xu } else { 6339dde3bc9SDavid Xu if (! kg->kg_completed && !(ku->ku_flags & KUF_DOUPCALL)) { 6345215b187SJeff Roberson kg->kg_upsleeps++; 6359dde3bc9SDavid Xu error = msleep(&kg->kg_completed, &p->p_mtx, 6369dde3bc9SDavid Xu PPAUSE|PCATCH, "kserel", 6379dde3bc9SDavid Xu (uap->timeout ? tvtohz(&tv) : 0)); 6385215b187SJeff Roberson kg->kg_upsleeps--; 639cd4f6ebbSDavid Xu } 6409dde3bc9SDavid Xu PROC_UNLOCK(p); 6419dde3bc9SDavid Xu } 642cd4f6ebbSDavid Xu if (ku->ku_flags & KUF_DOUPCALL) { 643cd4f6ebbSDavid Xu mtx_lock_spin(&sched_lock); 644cd4f6ebbSDavid Xu ku->ku_flags &= ~KUF_DOUPCALL; 645cd4f6ebbSDavid Xu mtx_unlock_spin(&sched_lock); 64693a7aa79SJulian Elischer } 647696058c3SJulian Elischer return (0); 6485c8329edSJulian Elischer } 6495c8329edSJulian Elischer 6505c8329edSJulian Elischer /* struct kse_wakeup_args { 6515c8329edSJulian Elischer struct kse_mailbox *mbx; 6525c8329edSJulian Elischer }; */ 6535c8329edSJulian Elischer int 6545c8329edSJulian Elischer kse_wakeup(struct thread *td, struct kse_wakeup_args *uap) 6555c8329edSJulian Elischer { 6565c8329edSJulian Elischer struct proc *p; 6575c8329edSJulian Elischer struct ksegrp *kg; 6585215b187SJeff Roberson struct kse_upcall *ku; 65993a7aa79SJulian Elischer struct thread *td2; 6605c8329edSJulian Elischer 6615c8329edSJulian Elischer p = td->td_proc; 66293a7aa79SJulian Elischer td2 = NULL; 6635215b187SJeff Roberson ku = NULL; 6645c8329edSJulian Elischer /* KSE-enabled processes only, please. */ 6650e2a4d3aSDavid Xu if (!(p->p_flag & P_SA)) 6665215b187SJeff Roberson return (EINVAL); 6675215b187SJeff Roberson PROC_LOCK(p); 66803ea4720SJulian Elischer mtx_lock_spin(&sched_lock); 6695c8329edSJulian Elischer if (uap->mbx) { 6705c8329edSJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 6715215b187SJeff Roberson FOREACH_UPCALL_IN_GROUP(kg, ku) { 6725215b187SJeff Roberson if (ku->ku_mailbox == uap->mbx) 67393a7aa79SJulian Elischer break; 67493a7aa79SJulian Elischer } 6755215b187SJeff Roberson if (ku) 67693a7aa79SJulian Elischer break; 6775c8329edSJulian Elischer } 6785c8329edSJulian Elischer } else { 6795c8329edSJulian Elischer kg = td->td_ksegrp; 6805215b187SJeff Roberson if (kg->kg_upsleeps) { 6815215b187SJeff Roberson wakeup_one(&kg->kg_completed); 6825215b187SJeff Roberson mtx_unlock_spin(&sched_lock); 6835215b187SJeff Roberson PROC_UNLOCK(p); 6845215b187SJeff Roberson return (0); 6855c8329edSJulian Elischer } 6865215b187SJeff Roberson ku = TAILQ_FIRST(&kg->kg_upcalls); 6875c8329edSJulian Elischer } 6885215b187SJeff Roberson if (ku) { 6895215b187SJeff Roberson if ((td2 = ku->ku_owner) == NULL) { 6905215b187SJeff Roberson panic("%s: no owner", __func__); 691707559e4SJohn Baldwin } else if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) && 6929dde3bc9SDavid Xu ((td2->td_wchan == &kg->kg_completed) || 6939dde3bc9SDavid Xu (td2->td_wchan == &p->p_siglist && 6949dde3bc9SDavid Xu (ku->ku_mflags & KMF_WAITSIGEVENT)))) { 69544f3b092SJohn Baldwin sleepq_abort(td2); 6965215b187SJeff Roberson } else { 6975215b187SJeff Roberson ku->ku_flags |= KUF_DOUPCALL; 69803ea4720SJulian Elischer } 6995c8329edSJulian Elischer mtx_unlock_spin(&sched_lock); 7005215b187SJeff Roberson PROC_UNLOCK(p); 7017b290dd0SDavid Xu return (0); 7025c8329edSJulian Elischer } 70393a7aa79SJulian Elischer mtx_unlock_spin(&sched_lock); 7045215b187SJeff Roberson PROC_UNLOCK(p); 70593a7aa79SJulian Elischer return (ESRCH); 70693a7aa79SJulian Elischer } 7075c8329edSJulian Elischer 7085c8329edSJulian Elischer /* 7095c8329edSJulian Elischer * No new KSEG: first call: use current KSE, don't schedule an upcall 7105215b187SJeff Roberson * All other situations, do allocate max new KSEs and schedule an upcall. 7115c8329edSJulian Elischer */ 7125c8329edSJulian Elischer /* struct kse_create_args { 7135c8329edSJulian Elischer struct kse_mailbox *mbx; 7145c8329edSJulian Elischer int newgroup; 7155c8329edSJulian Elischer }; */ 7165c8329edSJulian Elischer int 7175c8329edSJulian Elischer kse_create(struct thread *td, struct kse_create_args *uap) 7185c8329edSJulian Elischer { 7195c8329edSJulian Elischer struct kse *newke; 7205c8329edSJulian Elischer struct ksegrp *newkg; 7215c8329edSJulian Elischer struct ksegrp *kg; 7225c8329edSJulian Elischer struct proc *p; 7235c8329edSJulian Elischer struct kse_mailbox mbx; 7245215b187SJeff Roberson struct kse_upcall *newku; 725cd4f6ebbSDavid Xu int err, ncpus, sa = 0, first = 0; 726cd4f6ebbSDavid Xu struct thread *newtd; 7275c8329edSJulian Elischer 7285c8329edSJulian Elischer p = td->td_proc; 7295c8329edSJulian Elischer if ((err = copyin(uap->mbx, &mbx, sizeof(mbx)))) 7305c8329edSJulian Elischer return (err); 7315c8329edSJulian Elischer 7325215b187SJeff Roberson /* Too bad, why hasn't kernel always a cpu counter !? */ 7335215b187SJeff Roberson #ifdef SMP 7345215b187SJeff Roberson ncpus = mp_ncpus; 7355215b187SJeff Roberson #else 7365215b187SJeff Roberson ncpus = 1; 7375215b187SJeff Roberson #endif 738cd4f6ebbSDavid Xu if (virtual_cpu != 0) 7395215b187SJeff Roberson ncpus = virtual_cpu; 740cd4f6ebbSDavid Xu if (!(mbx.km_flags & KMF_BOUND)) 741cd4f6ebbSDavid Xu sa = TDF_SA; 742075102ccSDavid Xu else 743075102ccSDavid Xu ncpus = 1; 744661db6daSDavid Xu PROC_LOCK(p); 745cd4f6ebbSDavid Xu if (!(p->p_flag & P_SA)) { 746cd4f6ebbSDavid Xu first = 1; 7470e2a4d3aSDavid Xu p->p_flag |= P_SA; 748cd4f6ebbSDavid Xu } 749661db6daSDavid Xu PROC_UNLOCK(p); 750cd4f6ebbSDavid Xu if (!sa && !uap->newgroup && !first) 751cd4f6ebbSDavid Xu return (EINVAL); 7525c8329edSJulian Elischer kg = td->td_ksegrp; 7535c8329edSJulian Elischer if (uap->newgroup) { 7545215b187SJeff Roberson /* Have race condition but it is cheap */ 755fdc5ecd2SDavid Xu if (p->p_numksegrps >= max_groups_per_proc) 756fdc5ecd2SDavid Xu return (EPROCLIM); 7575c8329edSJulian Elischer /* 7585c8329edSJulian Elischer * If we want a new KSEGRP it doesn't matter whether 7595c8329edSJulian Elischer * we have already fired up KSE mode before or not. 7605215b187SJeff Roberson * We put the process in KSE mode and create a new KSEGRP. 7615c8329edSJulian Elischer */ 7625c8329edSJulian Elischer newkg = ksegrp_alloc(); 7635c8329edSJulian Elischer bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp, 7645c8329edSJulian Elischer kg_startzero, kg_endzero)); 7655c8329edSJulian Elischer bcopy(&kg->kg_startcopy, &newkg->kg_startcopy, 7665c8329edSJulian Elischer RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy)); 767ab2baa72SDavid Xu PROC_LOCK(p); 7685215b187SJeff Roberson mtx_lock_spin(&sched_lock); 7695215b187SJeff Roberson if (p->p_numksegrps >= max_groups_per_proc) { 7705215b187SJeff Roberson mtx_unlock_spin(&sched_lock); 771ab2baa72SDavid Xu PROC_UNLOCK(p); 7729948c47fSDavid Xu ksegrp_free(newkg); 7736f8132a8SJulian Elischer return (EPROCLIM); 7746f8132a8SJulian Elischer } 7759948c47fSDavid Xu ksegrp_link(newkg, p); 776ab2baa72SDavid Xu sched_fork_ksegrp(kg, newkg); 7775215b187SJeff Roberson mtx_unlock_spin(&sched_lock); 778ab2baa72SDavid Xu PROC_UNLOCK(p); 7796f8132a8SJulian Elischer } else { 7804184d791SDavid Xu if (!first && ((td->td_flags & TDF_SA) ^ sa) != 0) 7814184d791SDavid Xu return (EINVAL); 7825215b187SJeff Roberson newkg = kg; 7836f8132a8SJulian Elischer } 7845215b187SJeff Roberson 7855215b187SJeff Roberson /* 7865215b187SJeff Roberson * Creating upcalls more than number of physical cpu does 7875215b187SJeff Roberson * not help performance. 7885215b187SJeff Roberson */ 7895215b187SJeff Roberson if (newkg->kg_numupcalls >= ncpus) 7905215b187SJeff Roberson return (EPROCLIM); 7915215b187SJeff Roberson 7925215b187SJeff Roberson if (newkg->kg_numupcalls == 0) { 7935215b187SJeff Roberson /* 794cd4f6ebbSDavid Xu * Initialize KSE group 795cd4f6ebbSDavid Xu * 796cd4f6ebbSDavid Xu * For multiplxed group, create KSEs as many as physical 797cd4f6ebbSDavid Xu * cpus. This increases concurrent even if userland 798cd4f6ebbSDavid Xu * is not MP safe and can only run on single CPU. 7995215b187SJeff Roberson * In ideal world, every physical cpu should execute a thread. 8005215b187SJeff Roberson * If there is enough KSEs, threads in kernel can be 8015215b187SJeff Roberson * executed parallel on different cpus with full speed, 8025215b187SJeff Roberson * Concurrent in kernel shouldn't be restricted by number of 803cd4f6ebbSDavid Xu * upcalls userland provides. Adding more upcall structures 804cd4f6ebbSDavid Xu * only increases concurrent in userland. 805cd4f6ebbSDavid Xu * 806cd4f6ebbSDavid Xu * For bound thread group, because there is only thread in the 807cd4f6ebbSDavid Xu * group, we only create one KSE for the group. Thread in this 808cd4f6ebbSDavid Xu * kind of group will never schedule an upcall when blocked, 809cd4f6ebbSDavid Xu * this intends to simulate pthread system scope thread. 8105215b187SJeff Roberson */ 8115215b187SJeff Roberson while (newkg->kg_kses < ncpus) { 8125215b187SJeff Roberson newke = kse_alloc(); 8135c8329edSJulian Elischer bzero(&newke->ke_startzero, RANGEOF(struct kse, 8145c8329edSJulian Elischer ke_startzero, ke_endzero)); 8155c8329edSJulian Elischer #if 0 8165215b187SJeff Roberson mtx_lock_spin(&sched_lock); 8175c8329edSJulian Elischer bcopy(&ke->ke_startcopy, &newke->ke_startcopy, 8185c8329edSJulian Elischer RANGEOF(struct kse, ke_startcopy, ke_endcopy)); 8196f8132a8SJulian Elischer mtx_unlock_spin(&sched_lock); 8205215b187SJeff Roberson #endif 8215215b187SJeff Roberson mtx_lock_spin(&sched_lock); 8225c8329edSJulian Elischer kse_link(newke, newkg); 823ab2baa72SDavid Xu sched_fork_kse(td->td_kse, newke); 8245215b187SJeff Roberson /* Add engine */ 8255215b187SJeff Roberson kse_reassign(newke); 8265c8329edSJulian Elischer mtx_unlock_spin(&sched_lock); 8275215b187SJeff Roberson } 8285215b187SJeff Roberson } 8295215b187SJeff Roberson newku = upcall_alloc(); 8305215b187SJeff Roberson newku->ku_mailbox = uap->mbx; 8315215b187SJeff Roberson newku->ku_func = mbx.km_func; 8325215b187SJeff Roberson bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t)); 8335215b187SJeff Roberson 8345215b187SJeff Roberson /* For the first call this may not have been set */ 8355215b187SJeff Roberson if (td->td_standin == NULL) 8365215b187SJeff Roberson thread_alloc_spare(td, NULL); 8375215b187SJeff Roberson 8389dde3bc9SDavid Xu PROC_LOCK(p); 8395215b187SJeff Roberson if (newkg->kg_numupcalls >= ncpus) { 8409dde3bc9SDavid Xu PROC_UNLOCK(p); 8413b3df40fSDavid Xu upcall_free(newku); 8425215b187SJeff Roberson return (EPROCLIM); 8435215b187SJeff Roberson } 844af161f22SDavid Xu if (first && sa) { 8459dde3bc9SDavid Xu SIGSETOR(p->p_siglist, td->td_siglist); 8469dde3bc9SDavid Xu SIGEMPTYSET(td->td_siglist); 8479dde3bc9SDavid Xu SIGFILLSET(td->td_sigmask); 8489dde3bc9SDavid Xu SIG_CANTMASK(td->td_sigmask); 8499dde3bc9SDavid Xu } 8509dde3bc9SDavid Xu mtx_lock_spin(&sched_lock); 8519dde3bc9SDavid Xu PROC_UNLOCK(p); 8525215b187SJeff Roberson upcall_link(newku, newkg); 8536ce75196SDavid Xu if (mbx.km_quantum) 8546ce75196SDavid Xu newkg->kg_upquantum = max(1, mbx.km_quantum/tick); 8555215b187SJeff Roberson 8565215b187SJeff Roberson /* 8575215b187SJeff Roberson * Each upcall structure has an owner thread, find which 8585215b187SJeff Roberson * one owns it. 8595215b187SJeff Roberson */ 8605215b187SJeff Roberson if (uap->newgroup) { 8615215b187SJeff Roberson /* 8625215b187SJeff Roberson * Because new ksegrp hasn't thread, 8635215b187SJeff Roberson * create an initial upcall thread to own it. 8645215b187SJeff Roberson */ 865cd4f6ebbSDavid Xu newtd = thread_schedule_upcall(td, newku); 8665c8329edSJulian Elischer } else { 8675c8329edSJulian Elischer /* 8685215b187SJeff Roberson * If current thread hasn't an upcall structure, 8695215b187SJeff Roberson * just assign the upcall to it. 8705c8329edSJulian Elischer */ 8715215b187SJeff Roberson if (td->td_upcall == NULL) { 8725215b187SJeff Roberson newku->ku_owner = td; 8735215b187SJeff Roberson td->td_upcall = newku; 874cd4f6ebbSDavid Xu newtd = td; 8755215b187SJeff Roberson } else { 8765c8329edSJulian Elischer /* 8775215b187SJeff Roberson * Create a new upcall thread to own it. 8785c8329edSJulian Elischer */ 879cd4f6ebbSDavid Xu newtd = thread_schedule_upcall(td, newku); 8805215b187SJeff Roberson } 8815215b187SJeff Roberson } 882cd4f6ebbSDavid Xu if (!sa) { 883cd4f6ebbSDavid Xu newtd->td_mailbox = mbx.km_curthread; 884cd4f6ebbSDavid Xu newtd->td_flags &= ~TDF_SA; 885ab78d4d6SDavid Xu if (newtd != td) { 886ab78d4d6SDavid Xu mtx_unlock_spin(&sched_lock); 887ab78d4d6SDavid Xu cpu_set_upcall_kse(newtd, newku); 888ab78d4d6SDavid Xu mtx_lock_spin(&sched_lock); 889ab78d4d6SDavid Xu } 890cd4f6ebbSDavid Xu } else { 891cd4f6ebbSDavid Xu newtd->td_flags |= TDF_SA; 892cd4f6ebbSDavid Xu } 893ab78d4d6SDavid Xu if (newtd != td) 894ab78d4d6SDavid Xu setrunqueue(newtd); 8955215b187SJeff Roberson mtx_unlock_spin(&sched_lock); 8965c8329edSJulian Elischer return (0); 8975c8329edSJulian Elischer } 8985c8329edSJulian Elischer 8995c8329edSJulian Elischer /* 90044990b8cSJulian Elischer * Initialize global thread allocation resources. 90144990b8cSJulian Elischer */ 90244990b8cSJulian Elischer void 90344990b8cSJulian Elischer threadinit(void) 90444990b8cSJulian Elischer { 90544990b8cSJulian Elischer 906de028f5aSJeff Roberson thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 90744990b8cSJulian Elischer thread_ctor, thread_dtor, thread_init, thread_fini, 90844990b8cSJulian Elischer UMA_ALIGN_CACHE, 0); 909fdcac928SMarcel Moolenaar tid_zone = uma_zcreate("TID", sizeof(struct tid_bitmap_part), 910fdcac928SMarcel Moolenaar NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); 911de028f5aSJeff Roberson ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 912de028f5aSJeff Roberson NULL, NULL, ksegrp_init, NULL, 9134f0db5e0SJulian Elischer UMA_ALIGN_CACHE, 0); 914de028f5aSJeff Roberson kse_zone = uma_zcreate("KSE", sched_sizeof_kse(), 915de028f5aSJeff Roberson NULL, NULL, kse_init, NULL, 9164f0db5e0SJulian Elischer UMA_ALIGN_CACHE, 0); 9175215b187SJeff Roberson upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall), 9185215b187SJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); 91944990b8cSJulian Elischer } 92044990b8cSJulian Elischer 92144990b8cSJulian Elischer /* 9221faf202eSJulian Elischer * Stash an embarasingly extra thread into the zombie thread queue. 92344990b8cSJulian Elischer */ 92444990b8cSJulian Elischer void 92544990b8cSJulian Elischer thread_stash(struct thread *td) 92644990b8cSJulian Elischer { 9275215b187SJeff Roberson mtx_lock_spin(&kse_zombie_lock); 92844990b8cSJulian Elischer TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 9295215b187SJeff Roberson mtx_unlock_spin(&kse_zombie_lock); 93044990b8cSJulian Elischer } 93144990b8cSJulian Elischer 93244990b8cSJulian Elischer /* 9335c8329edSJulian Elischer * Stash an embarasingly extra kse into the zombie kse queue. 9345c8329edSJulian Elischer */ 9355c8329edSJulian Elischer void 9365c8329edSJulian Elischer kse_stash(struct kse *ke) 9375c8329edSJulian Elischer { 9385215b187SJeff Roberson mtx_lock_spin(&kse_zombie_lock); 9395c8329edSJulian Elischer TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); 9405215b187SJeff Roberson mtx_unlock_spin(&kse_zombie_lock); 9415215b187SJeff Roberson } 9425215b187SJeff Roberson 9435215b187SJeff Roberson /* 9445215b187SJeff Roberson * Stash an embarasingly extra upcall into the zombie upcall queue. 9455215b187SJeff Roberson */ 9465215b187SJeff Roberson 9475215b187SJeff Roberson void 9485215b187SJeff Roberson upcall_stash(struct kse_upcall *ku) 9495215b187SJeff Roberson { 9505215b187SJeff Roberson mtx_lock_spin(&kse_zombie_lock); 9515215b187SJeff Roberson TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link); 9525215b187SJeff Roberson mtx_unlock_spin(&kse_zombie_lock); 9535c8329edSJulian Elischer } 9545c8329edSJulian Elischer 9555c8329edSJulian Elischer /* 9565c8329edSJulian Elischer * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 9575c8329edSJulian Elischer */ 9585c8329edSJulian Elischer void 9595c8329edSJulian Elischer ksegrp_stash(struct ksegrp *kg) 9605c8329edSJulian Elischer { 9615215b187SJeff Roberson mtx_lock_spin(&kse_zombie_lock); 9625c8329edSJulian Elischer TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 9635215b187SJeff Roberson mtx_unlock_spin(&kse_zombie_lock); 9645c8329edSJulian Elischer } 9655c8329edSJulian Elischer 9665c8329edSJulian Elischer /* 9675215b187SJeff Roberson * Reap zombie kse resource. 96844990b8cSJulian Elischer */ 96944990b8cSJulian Elischer void 97044990b8cSJulian Elischer thread_reap(void) 97144990b8cSJulian Elischer { 9725c8329edSJulian Elischer struct thread *td_first, *td_next; 9735c8329edSJulian Elischer struct kse *ke_first, *ke_next; 9745c8329edSJulian Elischer struct ksegrp *kg_first, * kg_next; 9755215b187SJeff Roberson struct kse_upcall *ku_first, *ku_next; 97644990b8cSJulian Elischer 97744990b8cSJulian Elischer /* 9785215b187SJeff Roberson * Don't even bother to lock if none at this instant, 9795215b187SJeff Roberson * we really don't care about the next instant.. 98044990b8cSJulian Elischer */ 9815c8329edSJulian Elischer if ((!TAILQ_EMPTY(&zombie_threads)) 9825c8329edSJulian Elischer || (!TAILQ_EMPTY(&zombie_kses)) 9835215b187SJeff Roberson || (!TAILQ_EMPTY(&zombie_ksegrps)) 9845215b187SJeff Roberson || (!TAILQ_EMPTY(&zombie_upcalls))) { 9855215b187SJeff Roberson mtx_lock_spin(&kse_zombie_lock); 9865c8329edSJulian Elischer td_first = TAILQ_FIRST(&zombie_threads); 9875c8329edSJulian Elischer ke_first = TAILQ_FIRST(&zombie_kses); 9885c8329edSJulian Elischer kg_first = TAILQ_FIRST(&zombie_ksegrps); 9895215b187SJeff Roberson ku_first = TAILQ_FIRST(&zombie_upcalls); 9905c8329edSJulian Elischer if (td_first) 9915c8329edSJulian Elischer TAILQ_INIT(&zombie_threads); 9925c8329edSJulian Elischer if (ke_first) 9935c8329edSJulian Elischer TAILQ_INIT(&zombie_kses); 9945c8329edSJulian Elischer if (kg_first) 9955c8329edSJulian Elischer TAILQ_INIT(&zombie_ksegrps); 9965215b187SJeff Roberson if (ku_first) 9975215b187SJeff Roberson TAILQ_INIT(&zombie_upcalls); 9985215b187SJeff Roberson mtx_unlock_spin(&kse_zombie_lock); 9995c8329edSJulian Elischer while (td_first) { 10005c8329edSJulian Elischer td_next = TAILQ_NEXT(td_first, td_runq); 10015215b187SJeff Roberson if (td_first->td_ucred) 10025215b187SJeff Roberson crfree(td_first->td_ucred); 10035c8329edSJulian Elischer thread_free(td_first); 10045c8329edSJulian Elischer td_first = td_next; 100544990b8cSJulian Elischer } 10065c8329edSJulian Elischer while (ke_first) { 10075c8329edSJulian Elischer ke_next = TAILQ_NEXT(ke_first, ke_procq); 10085c8329edSJulian Elischer kse_free(ke_first); 10095c8329edSJulian Elischer ke_first = ke_next; 10105c8329edSJulian Elischer } 10115c8329edSJulian Elischer while (kg_first) { 10125c8329edSJulian Elischer kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 10135c8329edSJulian Elischer ksegrp_free(kg_first); 10145c8329edSJulian Elischer kg_first = kg_next; 10155c8329edSJulian Elischer } 10165215b187SJeff Roberson while (ku_first) { 10175215b187SJeff Roberson ku_next = TAILQ_NEXT(ku_first, ku_link); 10185215b187SJeff Roberson upcall_free(ku_first); 10195215b187SJeff Roberson ku_first = ku_next; 10205215b187SJeff Roberson } 102144990b8cSJulian Elischer } 102244990b8cSJulian Elischer } 102344990b8cSJulian Elischer 102444990b8cSJulian Elischer /* 10254f0db5e0SJulian Elischer * Allocate a ksegrp. 10264f0db5e0SJulian Elischer */ 10274f0db5e0SJulian Elischer struct ksegrp * 10284f0db5e0SJulian Elischer ksegrp_alloc(void) 10294f0db5e0SJulian Elischer { 1030a163d034SWarner Losh return (uma_zalloc(ksegrp_zone, M_WAITOK)); 10314f0db5e0SJulian Elischer } 10324f0db5e0SJulian Elischer 10334f0db5e0SJulian Elischer /* 10344f0db5e0SJulian Elischer * Allocate a kse. 10354f0db5e0SJulian Elischer */ 10364f0db5e0SJulian Elischer struct kse * 10374f0db5e0SJulian Elischer kse_alloc(void) 10384f0db5e0SJulian Elischer { 1039a163d034SWarner Losh return (uma_zalloc(kse_zone, M_WAITOK)); 10404f0db5e0SJulian Elischer } 10414f0db5e0SJulian Elischer 10424f0db5e0SJulian Elischer /* 104344990b8cSJulian Elischer * Allocate a thread. 104444990b8cSJulian Elischer */ 104544990b8cSJulian Elischer struct thread * 104644990b8cSJulian Elischer thread_alloc(void) 104744990b8cSJulian Elischer { 104844990b8cSJulian Elischer thread_reap(); /* check if any zombies to get */ 1049a163d034SWarner Losh return (uma_zalloc(thread_zone, M_WAITOK)); 105044990b8cSJulian Elischer } 105144990b8cSJulian Elischer 105244990b8cSJulian Elischer /* 10534f0db5e0SJulian Elischer * Deallocate a ksegrp. 10544f0db5e0SJulian Elischer */ 10554f0db5e0SJulian Elischer void 10564f0db5e0SJulian Elischer ksegrp_free(struct ksegrp *td) 10574f0db5e0SJulian Elischer { 10584f0db5e0SJulian Elischer uma_zfree(ksegrp_zone, td); 10594f0db5e0SJulian Elischer } 10604f0db5e0SJulian Elischer 10614f0db5e0SJulian Elischer /* 10624f0db5e0SJulian Elischer * Deallocate a kse. 10634f0db5e0SJulian Elischer */ 10644f0db5e0SJulian Elischer void 10654f0db5e0SJulian Elischer kse_free(struct kse *td) 10664f0db5e0SJulian Elischer { 10674f0db5e0SJulian Elischer uma_zfree(kse_zone, td); 10684f0db5e0SJulian Elischer } 10694f0db5e0SJulian Elischer 10704f0db5e0SJulian Elischer /* 107144990b8cSJulian Elischer * Deallocate a thread. 107244990b8cSJulian Elischer */ 107344990b8cSJulian Elischer void 107444990b8cSJulian Elischer thread_free(struct thread *td) 107544990b8cSJulian Elischer { 1076696058c3SJulian Elischer 1077696058c3SJulian Elischer cpu_thread_clean(td); 107844990b8cSJulian Elischer uma_zfree(thread_zone, td); 107944990b8cSJulian Elischer } 108044990b8cSJulian Elischer 108144990b8cSJulian Elischer /* 1082fdcac928SMarcel Moolenaar * Assign a thread ID. 1083fdcac928SMarcel Moolenaar */ 1084fdcac928SMarcel Moolenaar int 1085fdcac928SMarcel Moolenaar thread_new_tid(void) 1086fdcac928SMarcel Moolenaar { 1087fdcac928SMarcel Moolenaar struct tid_bitmap_part *bmp, *new; 1088fdcac928SMarcel Moolenaar int bit, idx, tid; 1089fdcac928SMarcel Moolenaar 1090fdcac928SMarcel Moolenaar mtx_lock(&tid_lock); 1091fdcac928SMarcel Moolenaar STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 1092fdcac928SMarcel Moolenaar if (bmp->bmp_free) 1093fdcac928SMarcel Moolenaar break; 1094fdcac928SMarcel Moolenaar } 1095fdcac928SMarcel Moolenaar /* Create a new bitmap if we run out of free bits. */ 1096fdcac928SMarcel Moolenaar if (bmp == NULL) { 1097fdcac928SMarcel Moolenaar mtx_unlock(&tid_lock); 1098fdcac928SMarcel Moolenaar new = uma_zalloc(tid_zone, M_WAITOK); 1099fdcac928SMarcel Moolenaar mtx_lock(&tid_lock); 1100fdcac928SMarcel Moolenaar bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next); 1101fdcac928SMarcel Moolenaar if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) { 1102fdcac928SMarcel Moolenaar /* 1=free, 0=assigned. This way we can use ffsl(). */ 1103fdcac928SMarcel Moolenaar memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap)); 1104fdcac928SMarcel Moolenaar new->bmp_base = (bmp == NULL) ? TID_MIN : 1105fdcac928SMarcel Moolenaar bmp->bmp_base + TID_IDS_PER_PART; 1106fdcac928SMarcel Moolenaar new->bmp_free = TID_IDS_PER_PART; 1107fdcac928SMarcel Moolenaar STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next); 1108fdcac928SMarcel Moolenaar bmp = new; 1109fdcac928SMarcel Moolenaar new = NULL; 1110fdcac928SMarcel Moolenaar } 1111fdcac928SMarcel Moolenaar } else 1112fdcac928SMarcel Moolenaar new = NULL; 1113fdcac928SMarcel Moolenaar /* We have a bitmap with available IDs. */ 1114fdcac928SMarcel Moolenaar idx = 0; 1115fdcac928SMarcel Moolenaar while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL) 1116fdcac928SMarcel Moolenaar idx++; 1117fdcac928SMarcel Moolenaar bit = ffsl(bmp->bmp_bitmap[idx]) - 1; 1118fdcac928SMarcel Moolenaar tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit; 1119fdcac928SMarcel Moolenaar bmp->bmp_bitmap[idx] &= ~(1UL << bit); 1120fdcac928SMarcel Moolenaar bmp->bmp_free--; 1121fdcac928SMarcel Moolenaar mtx_unlock(&tid_lock); 1122fdcac928SMarcel Moolenaar 1123fdcac928SMarcel Moolenaar if (new != NULL) 1124fdcac928SMarcel Moolenaar uma_zfree(tid_zone, new); 1125fdcac928SMarcel Moolenaar return (tid); 1126fdcac928SMarcel Moolenaar } 1127fdcac928SMarcel Moolenaar 1128fdcac928SMarcel Moolenaar /* 112944990b8cSJulian Elischer * Store the thread context in the UTS's mailbox. 11303d0586d4SJulian Elischer * then add the mailbox at the head of a list we are building in user space. 11313d0586d4SJulian Elischer * The list is anchored in the ksegrp structure. 113244990b8cSJulian Elischer */ 113344990b8cSJulian Elischer int 1134dd7da9aaSDavid Xu thread_export_context(struct thread *td, int willexit) 113544990b8cSJulian Elischer { 11360d294460SJuli Mallett struct proc *p; 11373d0586d4SJulian Elischer struct ksegrp *kg; 11383d0586d4SJulian Elischer uintptr_t mbx; 11393d0586d4SJulian Elischer void *addr; 11409dde3bc9SDavid Xu int error = 0, temp, sig; 11412b035cbeSJulian Elischer mcontext_t mc; 114244990b8cSJulian Elischer 11430d294460SJuli Mallett p = td->td_proc; 11440d294460SJuli Mallett kg = td->td_ksegrp; 11450d294460SJuli Mallett 1146c76e33b6SJonathan Mini /* Export the user/machine context. */ 11472b035cbeSJulian Elischer get_mcontext(td, &mc, 0); 11482b035cbeSJulian Elischer addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext); 11492b035cbeSJulian Elischer error = copyout(&mc, addr, sizeof(mcontext_t)); 115093a7aa79SJulian Elischer if (error) 115193a7aa79SJulian Elischer goto bad; 115244990b8cSJulian Elischer 11535215b187SJeff Roberson /* Exports clock ticks in kernel mode */ 11545215b187SJeff Roberson addr = (caddr_t)(&td->td_mailbox->tm_sticks); 11554e4422d4SMarcel Moolenaar temp = fuword32(addr) + td->td_usticks; 11564e4422d4SMarcel Moolenaar if (suword32(addr, temp)) { 11572b035cbeSJulian Elischer error = EFAULT; 11585215b187SJeff Roberson goto bad; 11592b035cbeSJulian Elischer } 11605215b187SJeff Roberson 11619dde3bc9SDavid Xu /* 11629dde3bc9SDavid Xu * Post sync signal, or process SIGKILL and SIGSTOP. 11639dde3bc9SDavid Xu * For sync signal, it is only possible when the signal is not 11649dde3bc9SDavid Xu * caught by userland or process is being debugged. 11659dde3bc9SDavid Xu */ 1166dd7da9aaSDavid Xu PROC_LOCK(p); 11679dde3bc9SDavid Xu if (td->td_flags & TDF_NEEDSIGCHK) { 11689dde3bc9SDavid Xu mtx_lock_spin(&sched_lock); 11699dde3bc9SDavid Xu td->td_flags &= ~TDF_NEEDSIGCHK; 11709dde3bc9SDavid Xu mtx_unlock_spin(&sched_lock); 11719dde3bc9SDavid Xu mtx_lock(&p->p_sigacts->ps_mtx); 11729dde3bc9SDavid Xu while ((sig = cursig(td)) != 0) 11739dde3bc9SDavid Xu postsig(sig); 11749dde3bc9SDavid Xu mtx_unlock(&p->p_sigacts->ps_mtx); 11759dde3bc9SDavid Xu } 1176dd7da9aaSDavid Xu if (willexit) 1177dd7da9aaSDavid Xu SIGFILLSET(td->td_sigmask); 1178dd7da9aaSDavid Xu PROC_UNLOCK(p); 11799dde3bc9SDavid Xu 11805215b187SJeff Roberson /* Get address in latest mbox of list pointer */ 11813d0586d4SJulian Elischer addr = (void *)(&td->td_mailbox->tm_next); 11823d0586d4SJulian Elischer /* 11833d0586d4SJulian Elischer * Put the saved address of the previous first 11843d0586d4SJulian Elischer * entry into this one 11853d0586d4SJulian Elischer */ 11863d0586d4SJulian Elischer for (;;) { 11873d0586d4SJulian Elischer mbx = (uintptr_t)kg->kg_completed; 11883d0586d4SJulian Elischer if (suword(addr, mbx)) { 118993a7aa79SJulian Elischer error = EFAULT; 11908798d4f9SDavid Xu goto bad; 11913d0586d4SJulian Elischer } 11920cd3964fSJulian Elischer PROC_LOCK(p); 11933d0586d4SJulian Elischer if (mbx == (uintptr_t)kg->kg_completed) { 11943d0586d4SJulian Elischer kg->kg_completed = td->td_mailbox; 11955215b187SJeff Roberson /* 11965215b187SJeff Roberson * The thread context may be taken away by 11975215b187SJeff Roberson * other upcall threads when we unlock 11985215b187SJeff Roberson * process lock. it's no longer valid to 11995215b187SJeff Roberson * use it again in any other places. 12005215b187SJeff Roberson */ 12015215b187SJeff Roberson td->td_mailbox = NULL; 12020cd3964fSJulian Elischer PROC_UNLOCK(p); 12033d0586d4SJulian Elischer break; 12043d0586d4SJulian Elischer } 12050cd3964fSJulian Elischer PROC_UNLOCK(p); 12063d0586d4SJulian Elischer } 12075215b187SJeff Roberson td->td_usticks = 0; 12083d0586d4SJulian Elischer return (0); 12098798d4f9SDavid Xu 12108798d4f9SDavid Xu bad: 12118798d4f9SDavid Xu PROC_LOCK(p); 1212dd7da9aaSDavid Xu sigexit(td, SIGILL); 121393a7aa79SJulian Elischer return (error); 12143d0586d4SJulian Elischer } 121544990b8cSJulian Elischer 12163d0586d4SJulian Elischer /* 12173d0586d4SJulian Elischer * Take the list of completed mailboxes for this KSEGRP and put them on this 12185215b187SJeff Roberson * upcall's mailbox as it's the next one going up. 12193d0586d4SJulian Elischer */ 12203d0586d4SJulian Elischer static int 12215215b187SJeff Roberson thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku) 12223d0586d4SJulian Elischer { 12230cd3964fSJulian Elischer struct proc *p = kg->kg_proc; 12243d0586d4SJulian Elischer void *addr; 12253d0586d4SJulian Elischer uintptr_t mbx; 12263d0586d4SJulian Elischer 12275215b187SJeff Roberson addr = (void *)(&ku->ku_mailbox->km_completed); 12283d0586d4SJulian Elischer for (;;) { 12293d0586d4SJulian Elischer mbx = (uintptr_t)kg->kg_completed; 12303d0586d4SJulian Elischer if (suword(addr, mbx)) { 12310cd3964fSJulian Elischer PROC_LOCK(p); 12320cd3964fSJulian Elischer psignal(p, SIGSEGV); 12330cd3964fSJulian Elischer PROC_UNLOCK(p); 12343d0586d4SJulian Elischer return (EFAULT); 12353d0586d4SJulian Elischer } 12360cd3964fSJulian Elischer PROC_LOCK(p); 12373d0586d4SJulian Elischer if (mbx == (uintptr_t)kg->kg_completed) { 12383d0586d4SJulian Elischer kg->kg_completed = NULL; 12390cd3964fSJulian Elischer PROC_UNLOCK(p); 12403d0586d4SJulian Elischer break; 12413d0586d4SJulian Elischer } 12420cd3964fSJulian Elischer PROC_UNLOCK(p); 12433d0586d4SJulian Elischer } 12443d0586d4SJulian Elischer return (0); 12453d0586d4SJulian Elischer } 124644990b8cSJulian Elischer 124744990b8cSJulian Elischer /* 12488798d4f9SDavid Xu * This function should be called at statclock interrupt time 12498798d4f9SDavid Xu */ 12508798d4f9SDavid Xu int 12515215b187SJeff Roberson thread_statclock(int user) 12528798d4f9SDavid Xu { 12538798d4f9SDavid Xu struct thread *td = curthread; 1254cd4f6ebbSDavid Xu struct ksegrp *kg = td->td_ksegrp; 12558798d4f9SDavid Xu 1256cd4f6ebbSDavid Xu if (kg->kg_numupcalls == 0 || !(td->td_flags & TDF_SA)) 1257cd4f6ebbSDavid Xu return (0); 12588798d4f9SDavid Xu if (user) { 12598798d4f9SDavid Xu /* Current always do via ast() */ 1260b4508d7dSDavid Xu mtx_lock_spin(&sched_lock); 12614a338afdSJulian Elischer td->td_flags |= (TDF_USTATCLOCK|TDF_ASTPENDING); 1262b4508d7dSDavid Xu mtx_unlock_spin(&sched_lock); 12635215b187SJeff Roberson td->td_uuticks++; 12648798d4f9SDavid Xu } else { 12658798d4f9SDavid Xu if (td->td_mailbox != NULL) 12665215b187SJeff Roberson td->td_usticks++; 12675215b187SJeff Roberson else { 12685215b187SJeff Roberson /* XXXKSE 12695215b187SJeff Roberson * We will call thread_user_enter() for every 12705215b187SJeff Roberson * kernel entry in future, so if the thread mailbox 12715215b187SJeff Roberson * is NULL, it must be a UTS kernel, don't account 12725215b187SJeff Roberson * clock ticks for it. 12735215b187SJeff Roberson */ 12748798d4f9SDavid Xu } 12755215b187SJeff Roberson } 12765215b187SJeff Roberson return (0); 12778798d4f9SDavid Xu } 12788798d4f9SDavid Xu 12795215b187SJeff Roberson /* 12804b4866edSDavid Xu * Export state clock ticks for userland 12815215b187SJeff Roberson */ 12828798d4f9SDavid Xu static int 12834b4866edSDavid Xu thread_update_usr_ticks(struct thread *td, int user) 12848798d4f9SDavid Xu { 12858798d4f9SDavid Xu struct proc *p = td->td_proc; 12868798d4f9SDavid Xu struct kse_thr_mailbox *tmbx; 12875215b187SJeff Roberson struct kse_upcall *ku; 12886ce75196SDavid Xu struct ksegrp *kg; 12898798d4f9SDavid Xu caddr_t addr; 12908b149b51SJohn Baldwin u_int uticks; 12918798d4f9SDavid Xu 12925215b187SJeff Roberson if ((ku = td->td_upcall) == NULL) 12935215b187SJeff Roberson return (-1); 12948798d4f9SDavid Xu 12955215b187SJeff Roberson tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread); 12968798d4f9SDavid Xu if ((tmbx == NULL) || (tmbx == (void *)-1)) 12975215b187SJeff Roberson return (-1); 12984b4866edSDavid Xu if (user) { 12995215b187SJeff Roberson uticks = td->td_uuticks; 13005215b187SJeff Roberson td->td_uuticks = 0; 13015215b187SJeff Roberson addr = (caddr_t)&tmbx->tm_uticks; 13024b4866edSDavid Xu } else { 13034b4866edSDavid Xu uticks = td->td_usticks; 13045215b187SJeff Roberson td->td_usticks = 0; 13054b4866edSDavid Xu addr = (caddr_t)&tmbx->tm_sticks; 13064b4866edSDavid Xu } 13074b4866edSDavid Xu if (uticks) { 13084e4422d4SMarcel Moolenaar if (suword32(addr, uticks+fuword32(addr))) { 13095215b187SJeff Roberson PROC_LOCK(p); 13105215b187SJeff Roberson psignal(p, SIGSEGV); 13115215b187SJeff Roberson PROC_UNLOCK(p); 13125215b187SJeff Roberson return (-2); 13135215b187SJeff Roberson } 13144b4866edSDavid Xu } 13156ce75196SDavid Xu kg = td->td_ksegrp; 13166ce75196SDavid Xu if (kg->kg_upquantum && ticks >= kg->kg_nextupcall) { 13174b4866edSDavid Xu mtx_lock_spin(&sched_lock); 13184b4866edSDavid Xu td->td_upcall->ku_flags |= KUF_DOUPCALL; 13194b4866edSDavid Xu mtx_unlock_spin(&sched_lock); 13204b4866edSDavid Xu } 13215215b187SJeff Roberson return (0); 13228798d4f9SDavid Xu } 13238798d4f9SDavid Xu 13248798d4f9SDavid Xu /* 132544990b8cSJulian Elischer * Discard the current thread and exit from its context. 132644990b8cSJulian Elischer * 132744990b8cSJulian Elischer * Because we can't free a thread while we're operating under its context, 1328696058c3SJulian Elischer * push the current thread into our CPU's deadthread holder. This means 1329696058c3SJulian Elischer * we needn't worry about someone else grabbing our context before we 1330696058c3SJulian Elischer * do a cpu_throw(). 133144990b8cSJulian Elischer */ 133244990b8cSJulian Elischer void 133344990b8cSJulian Elischer thread_exit(void) 133444990b8cSJulian Elischer { 133544990b8cSJulian Elischer struct thread *td; 133644990b8cSJulian Elischer struct kse *ke; 133744990b8cSJulian Elischer struct proc *p; 133844990b8cSJulian Elischer struct ksegrp *kg; 133944990b8cSJulian Elischer 134044990b8cSJulian Elischer td = curthread; 134144990b8cSJulian Elischer kg = td->td_ksegrp; 134244990b8cSJulian Elischer p = td->td_proc; 134344990b8cSJulian Elischer ke = td->td_kse; 134444990b8cSJulian Elischer 134544990b8cSJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 134688151aa3SJulian Elischer KASSERT(p != NULL, ("thread exiting without a process")); 134788151aa3SJulian Elischer KASSERT(ke != NULL, ("thread exiting without a kse")); 134888151aa3SJulian Elischer KASSERT(kg != NULL, ("thread exiting without a kse group")); 134944990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 135044990b8cSJulian Elischer CTR1(KTR_PROC, "thread_exit: thread %p", td); 135162a0fd94SJohn Baldwin mtx_assert(&Giant, MA_NOTOWNED); 135244990b8cSJulian Elischer 135348bfcdddSJulian Elischer if (td->td_standin != NULL) { 135448bfcdddSJulian Elischer thread_stash(td->td_standin); 135548bfcdddSJulian Elischer td->td_standin = NULL; 135648bfcdddSJulian Elischer } 135748bfcdddSJulian Elischer 135844990b8cSJulian Elischer cpu_thread_exit(td); /* XXXSMP */ 135944990b8cSJulian Elischer 13601faf202eSJulian Elischer /* 13611faf202eSJulian Elischer * The last thread is left attached to the process 13621faf202eSJulian Elischer * So that the whole bundle gets recycled. Skip 13631faf202eSJulian Elischer * all this stuff. 13641faf202eSJulian Elischer */ 13651faf202eSJulian Elischer if (p->p_numthreads > 1) { 1366d3a0bd78SJulian Elischer thread_unlink(td); 13670252d203SDavid Xu if (p->p_maxthrwaits) 13680252d203SDavid Xu wakeup(&p->p_numthreads); 136944990b8cSJulian Elischer /* 137044990b8cSJulian Elischer * The test below is NOT true if we are the 13711faf202eSJulian Elischer * sole exiting thread. P_STOPPED_SNGL is unset 137244990b8cSJulian Elischer * in exit1() after it is the only survivor. 137344990b8cSJulian Elischer */ 13741279572aSDavid Xu if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 137544990b8cSJulian Elischer if (p->p_numthreads == p->p_suspcount) { 137671fad9fdSJulian Elischer thread_unsuspend_one(p->p_singlethread); 137744990b8cSJulian Elischer } 137844990b8cSJulian Elischer } 137948bfcdddSJulian Elischer 13805215b187SJeff Roberson /* 13815215b187SJeff Roberson * Because each upcall structure has an owner thread, 13825215b187SJeff Roberson * owner thread exits only when process is in exiting 13835215b187SJeff Roberson * state, so upcall to userland is no longer needed, 13845215b187SJeff Roberson * deleting upcall structure is safe here. 13855215b187SJeff Roberson * So when all threads in a group is exited, all upcalls 13865215b187SJeff Roberson * in the group should be automatically freed. 13875215b187SJeff Roberson */ 13885215b187SJeff Roberson if (td->td_upcall) 13895215b187SJeff Roberson upcall_remove(td); 13906f8132a8SJulian Elischer 1391ab2baa72SDavid Xu sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); 1392ab2baa72SDavid Xu sched_exit_kse(FIRST_KSE_IN_PROC(p), ke); 13935215b187SJeff Roberson ke->ke_state = KES_UNQUEUED; 13945215b187SJeff Roberson ke->ke_thread = NULL; 139548bfcdddSJulian Elischer /* 139693a7aa79SJulian Elischer * Decide what to do with the KSE attached to this thread. 139748bfcdddSJulian Elischer */ 1398ab2baa72SDavid Xu if (ke->ke_flags & KEF_EXIT) { 13996f8132a8SJulian Elischer kse_unlink(ke); 1400ab2baa72SDavid Xu if (kg->kg_kses == 0) { 1401ab2baa72SDavid Xu sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), kg); 1402ab2baa72SDavid Xu ksegrp_unlink(kg); 1403ab2baa72SDavid Xu } 1404ab2baa72SDavid Xu } 14055215b187SJeff Roberson else 14066f8132a8SJulian Elischer kse_reassign(ke); 14076f8132a8SJulian Elischer PROC_UNLOCK(p); 14085215b187SJeff Roberson td->td_kse = NULL; 14095c8329edSJulian Elischer td->td_state = TDS_INACTIVE; 141036f7b36fSDavid Xu #if 0 14115c8329edSJulian Elischer td->td_proc = NULL; 141236f7b36fSDavid Xu #endif 14135c8329edSJulian Elischer td->td_ksegrp = NULL; 14145c8329edSJulian Elischer td->td_last_kse = NULL; 1415696058c3SJulian Elischer PCPU_SET(deadthread, td); 14161faf202eSJulian Elischer } else { 14171faf202eSJulian Elischer PROC_UNLOCK(p); 14181faf202eSJulian Elischer } 14194093529dSJeff Roberson /* XXX Shouldn't cpu_throw() here. */ 1420cc66ebe2SPeter Wemm mtx_assert(&sched_lock, MA_OWNED); 1421cc66ebe2SPeter Wemm cpu_throw(td, choosethread()); 1422cc66ebe2SPeter Wemm panic("I'm a teapot!"); 142344990b8cSJulian Elischer /* NOTREACHED */ 142444990b8cSJulian Elischer } 142544990b8cSJulian Elischer 142644990b8cSJulian Elischer /* 1427696058c3SJulian Elischer * Do any thread specific cleanups that may be needed in wait() 142837814395SPeter Wemm * called with Giant, proc and schedlock not held. 1429696058c3SJulian Elischer */ 1430696058c3SJulian Elischer void 1431696058c3SJulian Elischer thread_wait(struct proc *p) 1432696058c3SJulian Elischer { 1433696058c3SJulian Elischer struct thread *td; 1434696058c3SJulian Elischer 143537814395SPeter Wemm mtx_assert(&Giant, MA_NOTOWNED); 143685495c72SJens Schweikhardt KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()")); 143785495c72SJens Schweikhardt KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()")); 1438696058c3SJulian Elischer FOREACH_THREAD_IN_PROC(p, td) { 1439696058c3SJulian Elischer if (td->td_standin != NULL) { 1440696058c3SJulian Elischer thread_free(td->td_standin); 1441696058c3SJulian Elischer td->td_standin = NULL; 1442696058c3SJulian Elischer } 1443696058c3SJulian Elischer cpu_thread_clean(td); 1444696058c3SJulian Elischer } 1445696058c3SJulian Elischer thread_reap(); /* check for zombie threads etc. */ 1446696058c3SJulian Elischer } 1447696058c3SJulian Elischer 1448696058c3SJulian Elischer /* 144944990b8cSJulian Elischer * Link a thread to a process. 14501faf202eSJulian Elischer * set up anything that needs to be initialized for it to 14511faf202eSJulian Elischer * be used by the process. 145244990b8cSJulian Elischer * 145344990b8cSJulian Elischer * Note that we do not link to the proc's ucred here. 145444990b8cSJulian Elischer * The thread is linked as if running but no KSE assigned. 145544990b8cSJulian Elischer */ 145644990b8cSJulian Elischer void 145744990b8cSJulian Elischer thread_link(struct thread *td, struct ksegrp *kg) 145844990b8cSJulian Elischer { 145944990b8cSJulian Elischer struct proc *p; 146044990b8cSJulian Elischer 146144990b8cSJulian Elischer p = kg->kg_proc; 146271fad9fdSJulian Elischer td->td_state = TDS_INACTIVE; 146344990b8cSJulian Elischer td->td_proc = p; 146444990b8cSJulian Elischer td->td_ksegrp = kg; 146544990b8cSJulian Elischer td->td_last_kse = NULL; 14665215b187SJeff Roberson td->td_flags = 0; 14675215b187SJeff Roberson td->td_kse = NULL; 146844990b8cSJulian Elischer 14691faf202eSJulian Elischer LIST_INIT(&td->td_contested); 1470c06eb4e2SSam Leffler callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); 147144990b8cSJulian Elischer TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 147244990b8cSJulian Elischer TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 147344990b8cSJulian Elischer p->p_numthreads++; 147444990b8cSJulian Elischer kg->kg_numthreads++; 147544990b8cSJulian Elischer } 147644990b8cSJulian Elischer 1477d3a0bd78SJulian Elischer void 1478d3a0bd78SJulian Elischer thread_unlink(struct thread *td) 1479d3a0bd78SJulian Elischer { 1480d3a0bd78SJulian Elischer struct proc *p = td->td_proc; 1481d3a0bd78SJulian Elischer struct ksegrp *kg = td->td_ksegrp; 1482d3a0bd78SJulian Elischer 1483112afcb2SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 1484d3a0bd78SJulian Elischer TAILQ_REMOVE(&p->p_threads, td, td_plist); 1485d3a0bd78SJulian Elischer p->p_numthreads--; 1486d3a0bd78SJulian Elischer TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 1487d3a0bd78SJulian Elischer kg->kg_numthreads--; 1488d3a0bd78SJulian Elischer /* could clear a few other things here */ 1489d3a0bd78SJulian Elischer } 1490d3a0bd78SJulian Elischer 14915215b187SJeff Roberson /* 14925215b187SJeff Roberson * Purge a ksegrp resource. When a ksegrp is preparing to 14935215b187SJeff Roberson * exit, it calls this function. 14945215b187SJeff Roberson */ 1495a6f37ac9SJohn Baldwin static void 14965215b187SJeff Roberson kse_purge_group(struct thread *td) 14975215b187SJeff Roberson { 14985215b187SJeff Roberson struct ksegrp *kg; 14995215b187SJeff Roberson struct kse *ke; 15005215b187SJeff Roberson 15015215b187SJeff Roberson kg = td->td_ksegrp; 15025215b187SJeff Roberson KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__)); 15035215b187SJeff Roberson while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 15045215b187SJeff Roberson KASSERT(ke->ke_state == KES_IDLE, 15055215b187SJeff Roberson ("%s: wrong idle KSE state", __func__)); 15065215b187SJeff Roberson kse_unlink(ke); 15075215b187SJeff Roberson } 15085215b187SJeff Roberson KASSERT((kg->kg_kses == 1), 15095215b187SJeff Roberson ("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses)); 15105215b187SJeff Roberson KASSERT((kg->kg_numupcalls == 0), 15115215b187SJeff Roberson ("%s: ksegrp still has %d upcall datas", 15125215b187SJeff Roberson __func__, kg->kg_numupcalls)); 15135215b187SJeff Roberson } 15145215b187SJeff Roberson 15155215b187SJeff Roberson /* 15165215b187SJeff Roberson * Purge a process's KSE resource. When a process is preparing to 15175215b187SJeff Roberson * exit, it calls kse_purge to release any extra KSE resources in 15185215b187SJeff Roberson * the process. 15195215b187SJeff Roberson */ 1520a6f37ac9SJohn Baldwin static void 15215c8329edSJulian Elischer kse_purge(struct proc *p, struct thread *td) 15225c8329edSJulian Elischer { 15235c8329edSJulian Elischer struct ksegrp *kg; 15245215b187SJeff Roberson struct kse *ke; 15255c8329edSJulian Elischer 15265c8329edSJulian Elischer KASSERT(p->p_numthreads == 1, ("bad thread number")); 15275c8329edSJulian Elischer while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) { 15285c8329edSJulian Elischer TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 15295c8329edSJulian Elischer p->p_numksegrps--; 15305215b187SJeff Roberson /* 15315215b187SJeff Roberson * There is no ownership for KSE, after all threads 15325215b187SJeff Roberson * in the group exited, it is possible that some KSEs 15335215b187SJeff Roberson * were left in idle queue, gc them now. 15345215b187SJeff Roberson */ 15355215b187SJeff Roberson while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 15365215b187SJeff Roberson KASSERT(ke->ke_state == KES_IDLE, 15375215b187SJeff Roberson ("%s: wrong idle KSE state", __func__)); 15385215b187SJeff Roberson TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 15395215b187SJeff Roberson kg->kg_idle_kses--; 15405215b187SJeff Roberson TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 15415215b187SJeff Roberson kg->kg_kses--; 15425215b187SJeff Roberson kse_stash(ke); 15435215b187SJeff Roberson } 15445c8329edSJulian Elischer KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) || 15455c8329edSJulian Elischer ((kg->kg_kses == 1) && (kg == td->td_ksegrp)), 15465215b187SJeff Roberson ("ksegrp has wrong kg_kses: %d", kg->kg_kses)); 15475215b187SJeff Roberson KASSERT((kg->kg_numupcalls == 0), 15485215b187SJeff Roberson ("%s: ksegrp still has %d upcall datas", 15495215b187SJeff Roberson __func__, kg->kg_numupcalls)); 15505215b187SJeff Roberson 15515215b187SJeff Roberson if (kg != td->td_ksegrp) 15525c8329edSJulian Elischer ksegrp_stash(kg); 15535c8329edSJulian Elischer } 15545c8329edSJulian Elischer TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp); 15555c8329edSJulian Elischer p->p_numksegrps++; 15565c8329edSJulian Elischer } 15575c8329edSJulian Elischer 15585215b187SJeff Roberson /* 15595215b187SJeff Roberson * This function is intended to be used to initialize a spare thread 15605215b187SJeff Roberson * for upcall. Initialize thread's large data area outside sched_lock 15615215b187SJeff Roberson * for thread_schedule_upcall(). 15625215b187SJeff Roberson */ 15635215b187SJeff Roberson void 15645215b187SJeff Roberson thread_alloc_spare(struct thread *td, struct thread *spare) 15655215b187SJeff Roberson { 156637814395SPeter Wemm 15675215b187SJeff Roberson if (td->td_standin) 15685215b187SJeff Roberson return; 1569fdcac928SMarcel Moolenaar if (spare == NULL) { 15705215b187SJeff Roberson spare = thread_alloc(); 1571fdcac928SMarcel Moolenaar spare->td_tid = thread_new_tid(); 1572fdcac928SMarcel Moolenaar } 15735215b187SJeff Roberson td->td_standin = spare; 15745215b187SJeff Roberson bzero(&spare->td_startzero, 15755215b187SJeff Roberson (unsigned)RANGEOF(struct thread, td_startzero, td_endzero)); 15765215b187SJeff Roberson spare->td_proc = td->td_proc; 15775215b187SJeff Roberson spare->td_ucred = crhold(td->td_ucred); 15785215b187SJeff Roberson } 15795c8329edSJulian Elischer 158044990b8cSJulian Elischer /* 1581c76e33b6SJonathan Mini * Create a thread and schedule it for upcall on the KSE given. 158293a7aa79SJulian Elischer * Use our thread's standin so that we don't have to allocate one. 158344990b8cSJulian Elischer */ 158444990b8cSJulian Elischer struct thread * 15855215b187SJeff Roberson thread_schedule_upcall(struct thread *td, struct kse_upcall *ku) 158644990b8cSJulian Elischer { 158744990b8cSJulian Elischer struct thread *td2; 158844990b8cSJulian Elischer 158944990b8cSJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 159048bfcdddSJulian Elischer 159148bfcdddSJulian Elischer /* 15925215b187SJeff Roberson * Schedule an upcall thread on specified kse_upcall, 15935215b187SJeff Roberson * the kse_upcall must be free. 15945215b187SJeff Roberson * td must have a spare thread. 159548bfcdddSJulian Elischer */ 15965215b187SJeff Roberson KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__)); 159748bfcdddSJulian Elischer if ((td2 = td->td_standin) != NULL) { 159848bfcdddSJulian Elischer td->td_standin = NULL; 159944990b8cSJulian Elischer } else { 16005215b187SJeff Roberson panic("no reserve thread when scheduling an upcall"); 160148bfcdddSJulian Elischer return (NULL); 160244990b8cSJulian Elischer } 160344990b8cSJulian Elischer CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)", 160448bfcdddSJulian Elischer td2, td->td_proc->p_pid, td->td_proc->p_comm); 16051faf202eSJulian Elischer bcopy(&td->td_startcopy, &td2->td_startcopy, 16061faf202eSJulian Elischer (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); 16075215b187SJeff Roberson thread_link(td2, ku->ku_ksegrp); 160836f7b36fSDavid Xu /* inherit blocked thread's context */ 160911e0f8e1SMarcel Moolenaar cpu_set_upcall(td2, td); 16105215b187SJeff Roberson /* Let the new thread become owner of the upcall */ 16115215b187SJeff Roberson ku->ku_owner = td2; 16125215b187SJeff Roberson td2->td_upcall = ku; 1613cd4f6ebbSDavid Xu td2->td_flags = TDF_SA; 16141d5a24beSDavid Xu td2->td_pflags = TDP_UPCALLING; 16155215b187SJeff Roberson td2->td_kse = NULL; 161648bfcdddSJulian Elischer td2->td_state = TDS_CAN_RUN; 161748bfcdddSJulian Elischer td2->td_inhibitors = 0; 16189dde3bc9SDavid Xu SIGFILLSET(td2->td_sigmask); 16199dde3bc9SDavid Xu SIG_CANTMASK(td2->td_sigmask); 1620ab2baa72SDavid Xu sched_fork_thread(td, td2); 162148bfcdddSJulian Elischer return (td2); /* bogus.. should be a void function */ 162244990b8cSJulian Elischer } 162344990b8cSJulian Elischer 16249dde3bc9SDavid Xu /* 16259dde3bc9SDavid Xu * It is only used when thread generated a trap and process is being 16269dde3bc9SDavid Xu * debugged. 16279dde3bc9SDavid Xu */ 162858a3c273SJeff Roberson void 162958a3c273SJeff Roberson thread_signal_add(struct thread *td, int sig) 1630c76e33b6SJonathan Mini { 163158a3c273SJeff Roberson struct proc *p; 16329dde3bc9SDavid Xu siginfo_t siginfo; 16339dde3bc9SDavid Xu struct sigacts *ps; 1634c76e33b6SJonathan Mini int error; 1635c76e33b6SJonathan Mini 1636b0bd5f38SDavid Xu p = td->td_proc; 1637b0bd5f38SDavid Xu PROC_LOCK_ASSERT(p, MA_OWNED); 16389dde3bc9SDavid Xu ps = p->p_sigacts; 16399dde3bc9SDavid Xu mtx_assert(&ps->ps_mtx, MA_OWNED); 16409dde3bc9SDavid Xu 16414b7d5d84SDavid Xu cpu_thread_siginfo(sig, 0, &siginfo); 16429dde3bc9SDavid Xu mtx_unlock(&ps->ps_mtx); 1643c76e33b6SJonathan Mini PROC_UNLOCK(p); 16449dde3bc9SDavid Xu error = copyout(&siginfo, &td->td_mailbox->tm_syncsig, sizeof(siginfo)); 16459dde3bc9SDavid Xu if (error) { 164658a3c273SJeff Roberson PROC_LOCK(p); 164758a3c273SJeff Roberson sigexit(td, SIGILL); 164858a3c273SJeff Roberson } 16499dde3bc9SDavid Xu PROC_LOCK(p); 16509dde3bc9SDavid Xu SIGADDSET(td->td_sigmask, sig); 16519dde3bc9SDavid Xu mtx_lock(&ps->ps_mtx); 1652c76e33b6SJonathan Mini } 1653c76e33b6SJonathan Mini 16546ce75196SDavid Xu void 16556ce75196SDavid Xu thread_switchout(struct thread *td) 16566ce75196SDavid Xu { 16576ce75196SDavid Xu struct kse_upcall *ku; 1658ab78d4d6SDavid Xu struct thread *td2; 16596ce75196SDavid Xu 16606ce75196SDavid Xu mtx_assert(&sched_lock, MA_OWNED); 16616ce75196SDavid Xu 16626ce75196SDavid Xu /* 16636ce75196SDavid Xu * If the outgoing thread is in threaded group and has never 16646ce75196SDavid Xu * scheduled an upcall, decide whether this is a short 16656ce75196SDavid Xu * or long term event and thus whether or not to schedule 16666ce75196SDavid Xu * an upcall. 16676ce75196SDavid Xu * If it is a short term event, just suspend it in 16686ce75196SDavid Xu * a way that takes its KSE with it. 16696ce75196SDavid Xu * Select the events for which we want to schedule upcalls. 16706ce75196SDavid Xu * For now it's just sleep. 16716ce75196SDavid Xu * XXXKSE eventually almost any inhibition could do. 16726ce75196SDavid Xu */ 16736ce75196SDavid Xu if (TD_CAN_UNBIND(td) && (td->td_standin) && TD_ON_SLEEPQ(td)) { 16746ce75196SDavid Xu /* 16756ce75196SDavid Xu * Release ownership of upcall, and schedule an upcall 16766ce75196SDavid Xu * thread, this new upcall thread becomes the owner of 16776ce75196SDavid Xu * the upcall structure. 16786ce75196SDavid Xu */ 16796ce75196SDavid Xu ku = td->td_upcall; 16806ce75196SDavid Xu ku->ku_owner = NULL; 16816ce75196SDavid Xu td->td_upcall = NULL; 16826ce75196SDavid Xu td->td_flags &= ~TDF_CAN_UNBIND; 1683ab78d4d6SDavid Xu td2 = thread_schedule_upcall(td, ku); 1684ab78d4d6SDavid Xu setrunqueue(td2); 16856ce75196SDavid Xu } 16866ce75196SDavid Xu } 16876ce75196SDavid Xu 1688c76e33b6SJonathan Mini /* 16895215b187SJeff Roberson * Setup done on the thread when it enters the kernel. 16901434d3feSJulian Elischer * XXXKSE Presently only for syscalls but eventually all kernel entries. 16911434d3feSJulian Elischer */ 16921434d3feSJulian Elischer void 16931434d3feSJulian Elischer thread_user_enter(struct proc *p, struct thread *td) 16941434d3feSJulian Elischer { 16955215b187SJeff Roberson struct ksegrp *kg; 16965215b187SJeff Roberson struct kse_upcall *ku; 16971ecb38a3SDavid Xu struct kse_thr_mailbox *tmbx; 1698d3b5e418SDavid Xu uint32_t tflags; 16991434d3feSJulian Elischer 17005215b187SJeff Roberson kg = td->td_ksegrp; 17011ecb38a3SDavid Xu 17021434d3feSJulian Elischer /* 17031434d3feSJulian Elischer * First check that we shouldn't just abort. 17041434d3feSJulian Elischer * But check if we are the single thread first! 17051434d3feSJulian Elischer */ 1706cd4f6ebbSDavid Xu if (p->p_flag & P_SINGLE_EXIT) { 17071434d3feSJulian Elischer PROC_LOCK(p); 17081434d3feSJulian Elischer mtx_lock_spin(&sched_lock); 1709e574e444SDavid Xu thread_stopped(p); 17101434d3feSJulian Elischer thread_exit(); 17111434d3feSJulian Elischer /* NOTREACHED */ 17121434d3feSJulian Elischer } 17131434d3feSJulian Elischer 17141434d3feSJulian Elischer /* 17151434d3feSJulian Elischer * If we are doing a syscall in a KSE environment, 17161434d3feSJulian Elischer * note where our mailbox is. There is always the 171793a7aa79SJulian Elischer * possibility that we could do this lazily (in kse_reassign()), 17181434d3feSJulian Elischer * but for now do it every time. 17191434d3feSJulian Elischer */ 17205215b187SJeff Roberson kg = td->td_ksegrp; 1721cd4f6ebbSDavid Xu if (td->td_flags & TDF_SA) { 17225215b187SJeff Roberson ku = td->td_upcall; 17235215b187SJeff Roberson KASSERT(ku, ("%s: no upcall owned", __func__)); 17245215b187SJeff Roberson KASSERT((ku->ku_owner == td), ("%s: wrong owner", __func__)); 17251ecb38a3SDavid Xu KASSERT(!TD_CAN_UNBIND(td), ("%s: can unbind", __func__)); 17264e4422d4SMarcel Moolenaar ku->ku_mflags = fuword32((void *)&ku->ku_mailbox->km_flags); 17271ecb38a3SDavid Xu tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread); 1728d3b5e418SDavid Xu if ((tmbx == NULL) || (tmbx == (void *)-1L) || 1729d3b5e418SDavid Xu (ku->ku_mflags & KMF_NOUPCALL)) { 1730d3b5e418SDavid Xu td->td_mailbox = NULL; 1731d3b5e418SDavid Xu } else { 1732d3b5e418SDavid Xu if (td->td_standin == NULL) 1733d3b5e418SDavid Xu thread_alloc_spare(td, NULL); 1734ab908f59SDaniel Eischen tflags = fuword32(&tmbx->tm_flags); 1735d3b5e418SDavid Xu /* 1736d3b5e418SDavid Xu * On some architectures, TP register points to thread 1737d3b5e418SDavid Xu * mailbox but not points to kse mailbox, and userland 1738d3b5e418SDavid Xu * can not atomically clear km_curthread, but can 1739d3b5e418SDavid Xu * use TP register, and set TMF_NOUPCALL in thread 1740d3b5e418SDavid Xu * flag to indicate a critical region. 1741d3b5e418SDavid Xu */ 1742d3b5e418SDavid Xu if (tflags & TMF_NOUPCALL) { 17435215b187SJeff Roberson td->td_mailbox = NULL; 17448798d4f9SDavid Xu } else { 17451ecb38a3SDavid Xu td->td_mailbox = tmbx; 17468798d4f9SDavid Xu mtx_lock_spin(&sched_lock); 174793a7aa79SJulian Elischer td->td_flags |= TDF_CAN_UNBIND; 17488798d4f9SDavid Xu mtx_unlock_spin(&sched_lock); 17495215b187SJeff Roberson } 17501434d3feSJulian Elischer } 17511434d3feSJulian Elischer } 1752d3b5e418SDavid Xu } 17531434d3feSJulian Elischer 17541434d3feSJulian Elischer /* 1755c76e33b6SJonathan Mini * The extra work we go through if we are a threaded process when we 1756c76e33b6SJonathan Mini * return to userland. 1757c76e33b6SJonathan Mini * 1758c76e33b6SJonathan Mini * If we are a KSE process and returning to user mode, check for 1759c76e33b6SJonathan Mini * extra work to do before we return (e.g. for more syscalls 1760c76e33b6SJonathan Mini * to complete first). If we were in a critical section, we should 1761c76e33b6SJonathan Mini * just return to let it finish. Same if we were in the UTS (in 1762c76e33b6SJonathan Mini * which case the mailbox's context's busy indicator will be set). 1763c76e33b6SJonathan Mini * The only traps we suport will have set the mailbox. 1764c76e33b6SJonathan Mini * We will clear it here. 176544990b8cSJulian Elischer */ 1766c76e33b6SJonathan Mini int 1767253fdd5bSJulian Elischer thread_userret(struct thread *td, struct trapframe *frame) 1768c76e33b6SJonathan Mini { 17691ecb38a3SDavid Xu int error = 0, upcalls, uts_crit; 17705215b187SJeff Roberson struct kse_upcall *ku; 17710252d203SDavid Xu struct ksegrp *kg, *kg2; 177248bfcdddSJulian Elischer struct proc *p; 1773bfd83250SDavid Xu struct timespec ts; 1774c76e33b6SJonathan Mini 17756f8132a8SJulian Elischer p = td->td_proc; 17765215b187SJeff Roberson kg = td->td_ksegrp; 1777cd4f6ebbSDavid Xu ku = td->td_upcall; 177893a7aa79SJulian Elischer 1779cd4f6ebbSDavid Xu /* Nothing to do with bound thread */ 1780cd4f6ebbSDavid Xu if (!(td->td_flags & TDF_SA)) 17815215b187SJeff Roberson return (0); 17825215b187SJeff Roberson 17835215b187SJeff Roberson /* 17845215b187SJeff Roberson * Stat clock interrupt hit in userland, it 17855215b187SJeff Roberson * is returning from interrupt, charge thread's 17865215b187SJeff Roberson * userland time for UTS. 17875215b187SJeff Roberson */ 17885215b187SJeff Roberson if (td->td_flags & TDF_USTATCLOCK) { 17894b4866edSDavid Xu thread_update_usr_ticks(td, 1); 179093a7aa79SJulian Elischer mtx_lock_spin(&sched_lock); 17915215b187SJeff Roberson td->td_flags &= ~TDF_USTATCLOCK; 17920dbb100bSDavid Xu mtx_unlock_spin(&sched_lock); 17934b4866edSDavid Xu if (kg->kg_completed || 17944b4866edSDavid Xu (td->td_upcall->ku_flags & KUF_DOUPCALL)) 17954b4866edSDavid Xu thread_user_enter(p, td); 17965215b187SJeff Roberson } 17975215b187SJeff Roberson 17981ecb38a3SDavid Xu uts_crit = (td->td_mailbox == NULL); 17995215b187SJeff Roberson /* 18005215b187SJeff Roberson * Optimisation: 18015215b187SJeff Roberson * This thread has not started any upcall. 18025215b187SJeff Roberson * If there is no work to report other than ourself, 18035215b187SJeff Roberson * then it can return direct to userland. 18045215b187SJeff Roberson */ 18055215b187SJeff Roberson if (TD_CAN_UNBIND(td)) { 18065215b187SJeff Roberson mtx_lock_spin(&sched_lock); 18075215b187SJeff Roberson td->td_flags &= ~TDF_CAN_UNBIND; 18084093529dSJeff Roberson if ((td->td_flags & TDF_NEEDSIGCHK) == 0 && 180921e0492aSDavid Xu (kg->kg_completed == NULL) && 18106ce75196SDavid Xu (ku->ku_flags & KUF_DOUPCALL) == 0 && 181195bee4c3SDavid Xu (kg->kg_upquantum && ticks < kg->kg_nextupcall)) { 18124093529dSJeff Roberson mtx_unlock_spin(&sched_lock); 18134b4866edSDavid Xu thread_update_usr_ticks(td, 0); 18149a4b78c9SDavid Xu nanotime(&ts); 18159a4b78c9SDavid Xu error = copyout(&ts, 18169a4b78c9SDavid Xu (caddr_t)&ku->ku_mailbox->km_timeofday, 18179a4b78c9SDavid Xu sizeof(ts)); 181821e0492aSDavid Xu td->td_mailbox = 0; 18191ecb38a3SDavid Xu ku->ku_mflags = 0; 18209a4b78c9SDavid Xu if (error) 18219a4b78c9SDavid Xu goto out; 182293a7aa79SJulian Elischer return (0); 182393a7aa79SJulian Elischer } 18244093529dSJeff Roberson mtx_unlock_spin(&sched_lock); 1825dd7da9aaSDavid Xu thread_export_context(td, 0); 182693a7aa79SJulian Elischer /* 18275215b187SJeff Roberson * There is something to report, and we own an upcall 18285215b187SJeff Roberson * strucuture, we can go to userland. 18295215b187SJeff Roberson * Turn ourself into an upcall thread. 183093a7aa79SJulian Elischer */ 18311d5a24beSDavid Xu td->td_pflags |= TDP_UPCALLING; 18321ecb38a3SDavid Xu } else if (td->td_mailbox && (ku == NULL)) { 1833dd7da9aaSDavid Xu thread_export_context(td, 1); 1834e574e444SDavid Xu PROC_LOCK(p); 18356f8132a8SJulian Elischer /* 18365215b187SJeff Roberson * There are upcall threads waiting for 18375215b187SJeff Roberson * work to do, wake one of them up. 18385215b187SJeff Roberson * XXXKSE Maybe wake all of them up. 18396f8132a8SJulian Elischer */ 1840dd7da9aaSDavid Xu if (kg->kg_upsleeps) 18415215b187SJeff Roberson wakeup_one(&kg->kg_completed); 1842e574e444SDavid Xu mtx_lock_spin(&sched_lock); 1843e574e444SDavid Xu thread_stopped(p); 184493a7aa79SJulian Elischer thread_exit(); 18455215b187SJeff Roberson /* NOTREACHED */ 184648bfcdddSJulian Elischer } 184793a7aa79SJulian Elischer 1848cd4f6ebbSDavid Xu KASSERT(ku != NULL, ("upcall is NULL\n")); 1849a87891eeSDavid Xu KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind")); 1850a87891eeSDavid Xu 1851a87891eeSDavid Xu if (p->p_numthreads > max_threads_per_proc) { 1852a87891eeSDavid Xu max_threads_hits++; 1853a87891eeSDavid Xu PROC_LOCK(p); 1854112afcb2SJohn Baldwin mtx_lock_spin(&sched_lock); 18557677ce18SDavid Xu p->p_maxthrwaits++; 1856a87891eeSDavid Xu while (p->p_numthreads > max_threads_per_proc) { 1857a87891eeSDavid Xu upcalls = 0; 1858a87891eeSDavid Xu FOREACH_KSEGRP_IN_PROC(p, kg2) { 1859a87891eeSDavid Xu if (kg2->kg_numupcalls == 0) 1860a87891eeSDavid Xu upcalls++; 1861a87891eeSDavid Xu else 1862a87891eeSDavid Xu upcalls += kg2->kg_numupcalls; 1863a87891eeSDavid Xu } 1864a87891eeSDavid Xu if (upcalls >= max_threads_per_proc) 1865a87891eeSDavid Xu break; 18665073e68fSDavid Xu mtx_unlock_spin(&sched_lock); 186736407becSDavid Xu if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH, 186855cdddc0SPeter Wemm "maxthreads", 0)) { 18697677ce18SDavid Xu mtx_lock_spin(&sched_lock); 187036407becSDavid Xu break; 18717677ce18SDavid Xu } else { 1872112afcb2SJohn Baldwin mtx_lock_spin(&sched_lock); 1873a87891eeSDavid Xu } 18747677ce18SDavid Xu } 18757677ce18SDavid Xu p->p_maxthrwaits--; 1876112afcb2SJohn Baldwin mtx_unlock_spin(&sched_lock); 1877a87891eeSDavid Xu PROC_UNLOCK(p); 1878a87891eeSDavid Xu } 1879a87891eeSDavid Xu 18801d5a24beSDavid Xu if (td->td_pflags & TDP_UPCALLING) { 18811ecb38a3SDavid Xu uts_crit = 0; 18826ce75196SDavid Xu kg->kg_nextupcall = ticks+kg->kg_upquantum; 188348bfcdddSJulian Elischer /* 188444990b8cSJulian Elischer * There is no more work to do and we are going to ride 18855215b187SJeff Roberson * this thread up to userland as an upcall. 188648bfcdddSJulian Elischer * Do the last parts of the setup needed for the upcall. 188744990b8cSJulian Elischer */ 1888c76e33b6SJonathan Mini CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)", 1889ed32df81SJulian Elischer td, td->td_proc->p_pid, td->td_proc->p_comm); 1890c76e33b6SJonathan Mini 18911d5a24beSDavid Xu td->td_pflags &= ~TDP_UPCALLING; 1892cd4f6ebbSDavid Xu if (ku->ku_flags & KUF_DOUPCALL) { 18935215b187SJeff Roberson mtx_lock_spin(&sched_lock); 18945215b187SJeff Roberson ku->ku_flags &= ~KUF_DOUPCALL; 18955215b187SJeff Roberson mtx_unlock_spin(&sched_lock); 1896cd4f6ebbSDavid Xu } 1897c76e33b6SJonathan Mini /* 18981ecb38a3SDavid Xu * Set user context to the UTS 18991ecb38a3SDavid Xu */ 19001ecb38a3SDavid Xu if (!(ku->ku_mflags & KMF_NOUPCALL)) { 19011ecb38a3SDavid Xu cpu_set_upcall_kse(td, ku); 19021ecb38a3SDavid Xu error = suword(&ku->ku_mailbox->km_curthread, 0); 19031ecb38a3SDavid Xu if (error) 19041ecb38a3SDavid Xu goto out; 19051ecb38a3SDavid Xu } 19061ecb38a3SDavid Xu 19071ecb38a3SDavid Xu /* 190893a7aa79SJulian Elischer * Unhook the list of completed threads. 190993a7aa79SJulian Elischer * anything that completes after this gets to 191093a7aa79SJulian Elischer * come in next time. 191193a7aa79SJulian Elischer * Put the list of completed thread mailboxes on 191293a7aa79SJulian Elischer * this KSE's mailbox. 1913c76e33b6SJonathan Mini */ 19141ecb38a3SDavid Xu if (!(ku->ku_mflags & KMF_NOCOMPLETED) && 19151ecb38a3SDavid Xu (error = thread_link_mboxes(kg, ku)) != 0) 19160252d203SDavid Xu goto out; 19171ecb38a3SDavid Xu } 19181ecb38a3SDavid Xu if (!uts_crit) { 1919bfd83250SDavid Xu nanotime(&ts); 19201ecb38a3SDavid Xu error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts)); 1921bfd83250SDavid Xu } 19220252d203SDavid Xu 19230252d203SDavid Xu out: 19240252d203SDavid Xu if (error) { 19253d0586d4SJulian Elischer /* 1926fc8cdd87SDavid Xu * Things are going to be so screwed we should just kill 1927fc8cdd87SDavid Xu * the process. 19283d0586d4SJulian Elischer * how do we do that? 19293d0586d4SJulian Elischer */ 193048bfcdddSJulian Elischer PROC_LOCK(td->td_proc); 193148bfcdddSJulian Elischer psignal(td->td_proc, SIGSEGV); 193248bfcdddSJulian Elischer PROC_UNLOCK(td->td_proc); 19330252d203SDavid Xu } else { 19340252d203SDavid Xu /* 19350252d203SDavid Xu * Optimisation: 19360252d203SDavid Xu * Ensure that we have a spare thread available, 19370252d203SDavid Xu * for when we re-enter the kernel. 19380252d203SDavid Xu */ 19390252d203SDavid Xu if (td->td_standin == NULL) 19400252d203SDavid Xu thread_alloc_spare(td, NULL); 19410252d203SDavid Xu } 19420252d203SDavid Xu 19431ecb38a3SDavid Xu ku->ku_mflags = 0; 19440252d203SDavid Xu /* 19450252d203SDavid Xu * Clear thread mailbox first, then clear system tick count. 19460252d203SDavid Xu * The order is important because thread_statclock() use 19470252d203SDavid Xu * mailbox pointer to see if it is an userland thread or 19480252d203SDavid Xu * an UTS kernel thread. 19490252d203SDavid Xu */ 195093a7aa79SJulian Elischer td->td_mailbox = NULL; 19515215b187SJeff Roberson td->td_usticks = 0; 195248bfcdddSJulian Elischer return (error); /* go sync */ 195344990b8cSJulian Elischer } 195444990b8cSJulian Elischer 195544990b8cSJulian Elischer /* 195644990b8cSJulian Elischer * Enforce single-threading. 195744990b8cSJulian Elischer * 195844990b8cSJulian Elischer * Returns 1 if the caller must abort (another thread is waiting to 195944990b8cSJulian Elischer * exit the process or similar). Process is locked! 196044990b8cSJulian Elischer * Returns 0 when you are successfully the only thread running. 196144990b8cSJulian Elischer * A process has successfully single threaded in the suspend mode when 196244990b8cSJulian Elischer * There are no threads in user mode. Threads in the kernel must be 196344990b8cSJulian Elischer * allowed to continue until they get to the user boundary. They may even 196444990b8cSJulian Elischer * copy out their return values and data before suspending. They may however be 196544990b8cSJulian Elischer * accellerated in reaching the user boundary as we will wake up 196644990b8cSJulian Elischer * any sleeping threads that are interruptable. (PCATCH). 196744990b8cSJulian Elischer */ 196844990b8cSJulian Elischer int 196944990b8cSJulian Elischer thread_single(int force_exit) 197044990b8cSJulian Elischer { 197144990b8cSJulian Elischer struct thread *td; 197244990b8cSJulian Elischer struct thread *td2; 197344990b8cSJulian Elischer struct proc *p; 197444990b8cSJulian Elischer 197544990b8cSJulian Elischer td = curthread; 197644990b8cSJulian Elischer p = td->td_proc; 197737814395SPeter Wemm mtx_assert(&Giant, MA_NOTOWNED); 197844990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 197944990b8cSJulian Elischer KASSERT((td != NULL), ("curthread is NULL")); 198044990b8cSJulian Elischer 19810e2a4d3aSDavid Xu if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1) 198244990b8cSJulian Elischer return (0); 198344990b8cSJulian Elischer 1984e3b9bf71SJulian Elischer /* Is someone already single threading? */ 1985e3b9bf71SJulian Elischer if (p->p_singlethread) 198644990b8cSJulian Elischer return (1); 198744990b8cSJulian Elischer 198893a7aa79SJulian Elischer if (force_exit == SINGLE_EXIT) { 198944990b8cSJulian Elischer p->p_flag |= P_SINGLE_EXIT; 199093a7aa79SJulian Elischer } else 199144990b8cSJulian Elischer p->p_flag &= ~P_SINGLE_EXIT; 19921279572aSDavid Xu p->p_flag |= P_STOPPED_SINGLE; 199371fad9fdSJulian Elischer mtx_lock_spin(&sched_lock); 1994112afcb2SJohn Baldwin p->p_singlethread = td; 1995112afcb2SJohn Baldwin while ((p->p_numthreads - p->p_suspcount) != 1) { 199644990b8cSJulian Elischer FOREACH_THREAD_IN_PROC(p, td2) { 199744990b8cSJulian Elischer if (td2 == td) 199844990b8cSJulian Elischer continue; 1999588257e8SDavid Xu td2->td_flags |= TDF_ASTPENDING; 200071fad9fdSJulian Elischer if (TD_IS_INHIBITED(td2)) { 20011279572aSDavid Xu if (force_exit == SINGLE_EXIT) { 20029d102777SJulian Elischer if (TD_IS_SUSPENDED(td2)) { 200371fad9fdSJulian Elischer thread_unsuspend_one(td2); 200471fad9fdSJulian Elischer } 200533862f40SDavid Xu if (TD_ON_SLEEPQ(td2) && 200633862f40SDavid Xu (td2->td_flags & TDF_SINTR)) { 200744f3b092SJohn Baldwin sleepq_abort(td2); 200871fad9fdSJulian Elischer } 20099d102777SJulian Elischer } else { 20109d102777SJulian Elischer if (TD_IS_SUSPENDED(td2)) 20119d102777SJulian Elischer continue; 20125215b187SJeff Roberson /* 20135215b187SJeff Roberson * maybe other inhibitted states too? 20145215b187SJeff Roberson * XXXKSE Is it totally safe to 20155215b187SJeff Roberson * suspend a non-interruptable thread? 20165215b187SJeff Roberson */ 201793a7aa79SJulian Elischer if (td2->td_inhibitors & 20185215b187SJeff Roberson (TDI_SLEEPING | TDI_SWAPPED)) 20199d102777SJulian Elischer thread_suspend_one(td2); 202044990b8cSJulian Elischer } 202144990b8cSJulian Elischer } 20229d102777SJulian Elischer } 20239d102777SJulian Elischer /* 20249d102777SJulian Elischer * Maybe we suspended some threads.. was it enough? 20259d102777SJulian Elischer */ 2026112afcb2SJohn Baldwin if ((p->p_numthreads - p->p_suspcount) == 1) 20279d102777SJulian Elischer break; 20289d102777SJulian Elischer 202944990b8cSJulian Elischer /* 203044990b8cSJulian Elischer * Wake us up when everyone else has suspended. 2031e3b9bf71SJulian Elischer * In the mean time we suspend as well. 203244990b8cSJulian Elischer */ 203371fad9fdSJulian Elischer thread_suspend_one(td); 203444990b8cSJulian Elischer PROC_UNLOCK(p); 203529bcc451SJeff Roberson mi_switch(SW_VOL); 203644990b8cSJulian Elischer mtx_unlock_spin(&sched_lock); 203744990b8cSJulian Elischer PROC_LOCK(p); 2038112afcb2SJohn Baldwin mtx_lock_spin(&sched_lock); 203944990b8cSJulian Elischer } 20405215b187SJeff Roberson if (force_exit == SINGLE_EXIT) { 2041112afcb2SJohn Baldwin if (td->td_upcall) 20425215b187SJeff Roberson upcall_remove(td); 20435c8329edSJulian Elischer kse_purge(p, td); 20445215b187SJeff Roberson } 2045112afcb2SJohn Baldwin mtx_unlock_spin(&sched_lock); 204644990b8cSJulian Elischer return (0); 204744990b8cSJulian Elischer } 204844990b8cSJulian Elischer 204944990b8cSJulian Elischer /* 205044990b8cSJulian Elischer * Called in from locations that can safely check to see 205144990b8cSJulian Elischer * whether we have to suspend or at least throttle for a 205244990b8cSJulian Elischer * single-thread event (e.g. fork). 205344990b8cSJulian Elischer * 205444990b8cSJulian Elischer * Such locations include userret(). 205544990b8cSJulian Elischer * If the "return_instead" argument is non zero, the thread must be able to 205644990b8cSJulian Elischer * accept 0 (caller may continue), or 1 (caller must abort) as a result. 205744990b8cSJulian Elischer * 205844990b8cSJulian Elischer * The 'return_instead' argument tells the function if it may do a 205944990b8cSJulian Elischer * thread_exit() or suspend, or whether the caller must abort and back 206044990b8cSJulian Elischer * out instead. 206144990b8cSJulian Elischer * 206244990b8cSJulian Elischer * If the thread that set the single_threading request has set the 206344990b8cSJulian Elischer * P_SINGLE_EXIT bit in the process flags then this call will never return 206444990b8cSJulian Elischer * if 'return_instead' is false, but will exit. 206544990b8cSJulian Elischer * 206644990b8cSJulian Elischer * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 206744990b8cSJulian Elischer *---------------+--------------------+--------------------- 206844990b8cSJulian Elischer * 0 | returns 0 | returns 0 or 1 206944990b8cSJulian Elischer * | when ST ends | immediatly 207044990b8cSJulian Elischer *---------------+--------------------+--------------------- 207144990b8cSJulian Elischer * 1 | thread exits | returns 1 207244990b8cSJulian Elischer * | | immediatly 207344990b8cSJulian Elischer * 0 = thread_exit() or suspension ok, 207444990b8cSJulian Elischer * other = return error instead of stopping the thread. 207544990b8cSJulian Elischer * 207644990b8cSJulian Elischer * While a full suspension is under effect, even a single threading 207744990b8cSJulian Elischer * thread would be suspended if it made this call (but it shouldn't). 207844990b8cSJulian Elischer * This call should only be made from places where 207944990b8cSJulian Elischer * thread_exit() would be safe as that may be the outcome unless 208044990b8cSJulian Elischer * return_instead is set. 208144990b8cSJulian Elischer */ 208244990b8cSJulian Elischer int 208344990b8cSJulian Elischer thread_suspend_check(int return_instead) 208444990b8cSJulian Elischer { 2085ecafb24bSJuli Mallett struct thread *td; 2086ecafb24bSJuli Mallett struct proc *p; 208744990b8cSJulian Elischer 208844990b8cSJulian Elischer td = curthread; 208944990b8cSJulian Elischer p = td->td_proc; 209037814395SPeter Wemm mtx_assert(&Giant, MA_NOTOWNED); 209144990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 209244990b8cSJulian Elischer while (P_SHOULDSTOP(p)) { 20931279572aSDavid Xu if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 209444990b8cSJulian Elischer KASSERT(p->p_singlethread != NULL, 209544990b8cSJulian Elischer ("singlethread not set")); 209644990b8cSJulian Elischer /* 2097e3b9bf71SJulian Elischer * The only suspension in action is a 2098e3b9bf71SJulian Elischer * single-threading. Single threader need not stop. 2099b6d5995eSJulian Elischer * XXX Should be safe to access unlocked 2100b6d5995eSJulian Elischer * as it can only be set to be true by us. 210144990b8cSJulian Elischer */ 2102e3b9bf71SJulian Elischer if (p->p_singlethread == td) 210344990b8cSJulian Elischer return (0); /* Exempt from stopping. */ 210444990b8cSJulian Elischer } 2105e3b9bf71SJulian Elischer if (return_instead) 210644990b8cSJulian Elischer return (1); 210744990b8cSJulian Elischer 2108e574e444SDavid Xu mtx_lock_spin(&sched_lock); 2109e574e444SDavid Xu thread_stopped(p); 211044990b8cSJulian Elischer /* 211144990b8cSJulian Elischer * If the process is waiting for us to exit, 211244990b8cSJulian Elischer * this thread should just suicide. 21131279572aSDavid Xu * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 211444990b8cSJulian Elischer */ 211544990b8cSJulian Elischer if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 21160e2a4d3aSDavid Xu if (p->p_flag & P_SA) 211744990b8cSJulian Elischer thread_exit(); 21182c10d16aSJeff Roberson else 21192c10d16aSJeff Roberson thr_exit1(); 212044990b8cSJulian Elischer } 212144990b8cSJulian Elischer 212244990b8cSJulian Elischer /* 212344990b8cSJulian Elischer * When a thread suspends, it just 212444990b8cSJulian Elischer * moves to the processes's suspend queue 212544990b8cSJulian Elischer * and stays there. 212644990b8cSJulian Elischer */ 212771fad9fdSJulian Elischer thread_suspend_one(td); 21281279572aSDavid Xu if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 2129cf19bf91SJulian Elischer if (p->p_numthreads == p->p_suspcount) { 213071fad9fdSJulian Elischer thread_unsuspend_one(p->p_singlethread); 2131cf19bf91SJulian Elischer } 2132cf19bf91SJulian Elischer } 2133a6f37ac9SJohn Baldwin PROC_UNLOCK(p); 213429bcc451SJeff Roberson mi_switch(SW_INVOL); 213544990b8cSJulian Elischer mtx_unlock_spin(&sched_lock); 213644990b8cSJulian Elischer PROC_LOCK(p); 213744990b8cSJulian Elischer } 213844990b8cSJulian Elischer return (0); 213944990b8cSJulian Elischer } 214044990b8cSJulian Elischer 214135c32a76SDavid Xu void 214235c32a76SDavid Xu thread_suspend_one(struct thread *td) 214335c32a76SDavid Xu { 214435c32a76SDavid Xu struct proc *p = td->td_proc; 214535c32a76SDavid Xu 214635c32a76SDavid Xu mtx_assert(&sched_lock, MA_OWNED); 2147112afcb2SJohn Baldwin PROC_LOCK_ASSERT(p, MA_OWNED); 2148e574e444SDavid Xu KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); 214935c32a76SDavid Xu p->p_suspcount++; 215071fad9fdSJulian Elischer TD_SET_SUSPENDED(td); 215135c32a76SDavid Xu TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 215271fad9fdSJulian Elischer /* 215371fad9fdSJulian Elischer * Hack: If we are suspending but are on the sleep queue 215471fad9fdSJulian Elischer * then we are in msleep or the cv equivalent. We 215571fad9fdSJulian Elischer * want to look like we have two Inhibitors. 21569d102777SJulian Elischer * May already be set.. doesn't matter. 215771fad9fdSJulian Elischer */ 215871fad9fdSJulian Elischer if (TD_ON_SLEEPQ(td)) 215971fad9fdSJulian Elischer TD_SET_SLEEPING(td); 216035c32a76SDavid Xu } 216135c32a76SDavid Xu 216235c32a76SDavid Xu void 216335c32a76SDavid Xu thread_unsuspend_one(struct thread *td) 216435c32a76SDavid Xu { 216535c32a76SDavid Xu struct proc *p = td->td_proc; 216635c32a76SDavid Xu 216735c32a76SDavid Xu mtx_assert(&sched_lock, MA_OWNED); 2168112afcb2SJohn Baldwin PROC_LOCK_ASSERT(p, MA_OWNED); 216935c32a76SDavid Xu TAILQ_REMOVE(&p->p_suspended, td, td_runq); 217071fad9fdSJulian Elischer TD_CLR_SUSPENDED(td); 217135c32a76SDavid Xu p->p_suspcount--; 217271fad9fdSJulian Elischer setrunnable(td); 217335c32a76SDavid Xu } 217435c32a76SDavid Xu 217544990b8cSJulian Elischer /* 217644990b8cSJulian Elischer * Allow all threads blocked by single threading to continue running. 217744990b8cSJulian Elischer */ 217844990b8cSJulian Elischer void 217944990b8cSJulian Elischer thread_unsuspend(struct proc *p) 218044990b8cSJulian Elischer { 218144990b8cSJulian Elischer struct thread *td; 218244990b8cSJulian Elischer 2183b6d5995eSJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 218444990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 218544990b8cSJulian Elischer if (!P_SHOULDSTOP(p)) { 218644990b8cSJulian Elischer while (( td = TAILQ_FIRST(&p->p_suspended))) { 218735c32a76SDavid Xu thread_unsuspend_one(td); 218844990b8cSJulian Elischer } 21891279572aSDavid Xu } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 219044990b8cSJulian Elischer (p->p_numthreads == p->p_suspcount)) { 219144990b8cSJulian Elischer /* 219244990b8cSJulian Elischer * Stopping everything also did the job for the single 219344990b8cSJulian Elischer * threading request. Now we've downgraded to single-threaded, 219444990b8cSJulian Elischer * let it continue. 219544990b8cSJulian Elischer */ 219635c32a76SDavid Xu thread_unsuspend_one(p->p_singlethread); 219744990b8cSJulian Elischer } 219844990b8cSJulian Elischer } 219944990b8cSJulian Elischer 220044990b8cSJulian Elischer void 220144990b8cSJulian Elischer thread_single_end(void) 220244990b8cSJulian Elischer { 220344990b8cSJulian Elischer struct thread *td; 220444990b8cSJulian Elischer struct proc *p; 220544990b8cSJulian Elischer 220644990b8cSJulian Elischer td = curthread; 220744990b8cSJulian Elischer p = td->td_proc; 220844990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 22091279572aSDavid Xu p->p_flag &= ~P_STOPPED_SINGLE; 2210112afcb2SJohn Baldwin mtx_lock_spin(&sched_lock); 221144990b8cSJulian Elischer p->p_singlethread = NULL; 221249539972SJulian Elischer /* 221349539972SJulian Elischer * If there are other threads they mey now run, 221449539972SJulian Elischer * unless of course there is a blanket 'stop order' 221549539972SJulian Elischer * on the process. The single threader must be allowed 221649539972SJulian Elischer * to continue however as this is a bad place to stop. 221749539972SJulian Elischer */ 221849539972SJulian Elischer if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 221949539972SJulian Elischer while (( td = TAILQ_FIRST(&p->p_suspended))) { 222071fad9fdSJulian Elischer thread_unsuspend_one(td); 222144990b8cSJulian Elischer } 222249539972SJulian Elischer } 2223112afcb2SJohn Baldwin mtx_unlock_spin(&sched_lock); 222449539972SJulian Elischer } 2225