19454b2d8SWarner Losh /*- 28a36da99SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 38a36da99SPedro F. Giffuni * 444990b8cSJulian Elischer * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 544990b8cSJulian Elischer * All rights reserved. 644990b8cSJulian Elischer * 744990b8cSJulian Elischer * Redistribution and use in source and binary forms, with or without 844990b8cSJulian Elischer * modification, are permitted provided that the following conditions 944990b8cSJulian Elischer * are met: 1044990b8cSJulian Elischer * 1. Redistributions of source code must retain the above copyright 1144990b8cSJulian Elischer * notice(s), this list of conditions and the following disclaimer as 1244990b8cSJulian Elischer * the first lines of this file unmodified other than the possible 1344990b8cSJulian Elischer * addition of one or more copyright notices. 1444990b8cSJulian Elischer * 2. Redistributions in binary form must reproduce the above copyright 1544990b8cSJulian Elischer * notice(s), this list of conditions and the following disclaimer in the 1644990b8cSJulian Elischer * documentation and/or other materials provided with the distribution. 1744990b8cSJulian Elischer * 1844990b8cSJulian Elischer * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 1944990b8cSJulian Elischer * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 2044990b8cSJulian Elischer * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 2144990b8cSJulian Elischer * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 2244990b8cSJulian Elischer * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 2344990b8cSJulian Elischer * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 2444990b8cSJulian Elischer * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 2544990b8cSJulian Elischer * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2644990b8cSJulian Elischer * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2744990b8cSJulian Elischer * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 2844990b8cSJulian Elischer * DAMAGE. 2944990b8cSJulian Elischer */ 3044990b8cSJulian Elischer 313d06b4b3SAttilio Rao #include "opt_witness.h" 3216d95d4fSJoseph Koshy #include "opt_hwpmc_hooks.h" 333d06b4b3SAttilio Rao 34677b542eSDavid E. O'Brien #include <sys/cdefs.h> 35677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 36677b542eSDavid E. O'Brien 3744990b8cSJulian Elischer #include <sys/param.h> 3844990b8cSJulian Elischer #include <sys/systm.h> 3944990b8cSJulian Elischer #include <sys/kernel.h> 4044990b8cSJulian Elischer #include <sys/lock.h> 415dda15adSMark Johnston #include <sys/msan.h> 4244990b8cSJulian Elischer #include <sys/mutex.h> 4344990b8cSJulian Elischer #include <sys/proc.h> 4435bb59edSMateusz Guzik #include <sys/bitstring.h> 456febf180SGleb Smirnoff #include <sys/epoch.h> 468f0e9130SKonstantin Belousov #include <sys/rangelock.h> 47e170bfdaSDavid Xu #include <sys/resourcevar.h> 48b3e9e682SRyan Stone #include <sys/sdt.h> 4994e0a4cdSJulian Elischer #include <sys/smp.h> 50de028f5aSJeff Roberson #include <sys/sched.h> 5144f3b092SJohn Baldwin #include <sys/sleepqueue.h> 52ace8398dSJeff Roberson #include <sys/selinfo.h> 53d1e7a4a5SJohn Baldwin #include <sys/syscallsubr.h> 54598f2b81SMateusz Guzik #include <sys/dtrace_bsd.h> 5591d1786fSDmitry Chagin #include <sys/sysent.h> 56961a7b24SJohn Baldwin #include <sys/turnstile.h> 57d116b9f1SMateusz Guzik #include <sys/taskqueue.h> 5844990b8cSJulian Elischer #include <sys/ktr.h> 59cf7d9a8cSDavid Xu #include <sys/rwlock.h> 60af29f399SDmitry Chagin #include <sys/umtxvar.h> 619ed01c32SGleb Smirnoff #include <sys/vmmeter.h> 62d7f687fcSJeff Roberson #include <sys/cpuset.h> 6316d95d4fSJoseph Koshy #ifdef HWPMC_HOOKS 6416d95d4fSJoseph Koshy #include <sys/pmckern.h> 6516d95d4fSJoseph Koshy #endif 661bd3cf5dSMateusz Guzik #include <sys/priv.h> 6744990b8cSJulian Elischer 68911b84b0SRobert Watson #include <security/audit/audit.h> 69911b84b0SRobert Watson 70d116b9f1SMateusz Guzik #include <vm/pmap.h> 7144990b8cSJulian Elischer #include <vm/vm.h> 7249a2507bSAlan Cox #include <vm/vm_extern.h> 7344990b8cSJulian Elischer #include <vm/uma.h> 74d116b9f1SMateusz Guzik #include <vm/vm_phys.h> 75b209f889SRandall Stewart #include <sys/eventhandler.h> 7602fb42b0SPeter Wemm 77acd9f517SKonstantin Belousov /* 78acd9f517SKonstantin Belousov * Asserts below verify the stability of struct thread and struct proc 79acd9f517SKonstantin Belousov * layout, as exposed by KBI to modules. On head, the KBI is allowed 80acd9f517SKonstantin Belousov * to drift, change to the structures must be accompanied by the 81acd9f517SKonstantin Belousov * assert update. 82acd9f517SKonstantin Belousov * 83acd9f517SKonstantin Belousov * On the stable branches after KBI freeze, conditions must not be 84acd9f517SKonstantin Belousov * violated. Typically new fields are moved to the end of the 85acd9f517SKonstantin Belousov * structures. 86acd9f517SKonstantin Belousov */ 87acd9f517SKonstantin Belousov #ifdef __amd64__ 88a422084aSMark Johnston _Static_assert(offsetof(struct thread, td_flags) == 0x108, 89acd9f517SKonstantin Belousov "struct thread KBI td_flags"); 90a422084aSMark Johnston _Static_assert(offsetof(struct thread, td_pflags) == 0x110, 91acd9f517SKonstantin Belousov "struct thread KBI td_pflags"); 92a422084aSMark Johnston _Static_assert(offsetof(struct thread, td_frame) == 0x4a8, 93acd9f517SKonstantin Belousov "struct thread KBI td_frame"); 94bd9e0f5dSKonstantin Belousov _Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0, 95acd9f517SKonstantin Belousov "struct thread KBI td_emuldata"); 9685078b85SConrad Meyer _Static_assert(offsetof(struct proc, p_flag) == 0xb8, 97acd9f517SKonstantin Belousov "struct proc KBI p_flag"); 9885078b85SConrad Meyer _Static_assert(offsetof(struct proc, p_pid) == 0xc4, 99acd9f517SKonstantin Belousov "struct proc KBI p_pid"); 100351d5f7fSKonstantin Belousov _Static_assert(offsetof(struct proc, p_filemon) == 0x3c8, 101acd9f517SKonstantin Belousov "struct proc KBI p_filemon"); 102351d5f7fSKonstantin Belousov _Static_assert(offsetof(struct proc, p_comm) == 0x3e0, 103acd9f517SKonstantin Belousov "struct proc KBI p_comm"); 104351d5f7fSKonstantin Belousov _Static_assert(offsetof(struct proc, p_emuldata) == 0x4c8, 105acd9f517SKonstantin Belousov "struct proc KBI p_emuldata"); 106acd9f517SKonstantin Belousov #endif 107acd9f517SKonstantin Belousov #ifdef __i386__ 108a422084aSMark Johnston _Static_assert(offsetof(struct thread, td_flags) == 0x9c, 109acd9f517SKonstantin Belousov "struct thread KBI td_flags"); 110a422084aSMark Johnston _Static_assert(offsetof(struct thread, td_pflags) == 0xa4, 111acd9f517SKonstantin Belousov "struct thread KBI td_pflags"); 112a422084aSMark Johnston _Static_assert(offsetof(struct thread, td_frame) == 0x308, 113acd9f517SKonstantin Belousov "struct thread KBI td_frame"); 114a422084aSMark Johnston _Static_assert(offsetof(struct thread, td_emuldata) == 0x34c, 115acd9f517SKonstantin Belousov "struct thread KBI td_emuldata"); 11685078b85SConrad Meyer _Static_assert(offsetof(struct proc, p_flag) == 0x6c, 117acd9f517SKonstantin Belousov "struct proc KBI p_flag"); 11885078b85SConrad Meyer _Static_assert(offsetof(struct proc, p_pid) == 0x78, 119acd9f517SKonstantin Belousov "struct proc KBI p_pid"); 1204d675b80SKonstantin Belousov _Static_assert(offsetof(struct proc, p_filemon) == 0x270, 121acd9f517SKonstantin Belousov "struct proc KBI p_filemon"); 1224d675b80SKonstantin Belousov _Static_assert(offsetof(struct proc, p_comm) == 0x284, 123acd9f517SKonstantin Belousov "struct proc KBI p_comm"); 1244d675b80SKonstantin Belousov _Static_assert(offsetof(struct proc, p_emuldata) == 0x310, 125acd9f517SKonstantin Belousov "struct proc KBI p_emuldata"); 126acd9f517SKonstantin Belousov #endif 127acd9f517SKonstantin Belousov 128b3e9e682SRyan Stone SDT_PROVIDER_DECLARE(proc); 129d9fae5abSAndriy Gapon SDT_PROBE_DEFINE(proc, , , lwp__exit); 130b3e9e682SRyan Stone 1318460a577SJohn Birrell /* 1328460a577SJohn Birrell * thread related storage. 1338460a577SJohn Birrell */ 13444990b8cSJulian Elischer static uma_zone_t thread_zone; 13544990b8cSJulian Elischer 136d116b9f1SMateusz Guzik struct thread_domain_data { 137d116b9f1SMateusz Guzik struct thread *tdd_zombies; 138d116b9f1SMateusz Guzik int tdd_reapticks; 139d116b9f1SMateusz Guzik } __aligned(CACHE_LINE_SIZE); 140d116b9f1SMateusz Guzik 141d116b9f1SMateusz Guzik static struct thread_domain_data thread_domain_data[MAXMEMDOM]; 142d116b9f1SMateusz Guzik 143d116b9f1SMateusz Guzik static struct task thread_reap_task; 144d116b9f1SMateusz Guzik static struct callout thread_reap_callout; 14544990b8cSJulian Elischer 146ff8fbcffSJeff Roberson static void thread_zombie(struct thread *); 147b83e94beSMateusz Guzik static void thread_reap(void); 148d116b9f1SMateusz Guzik static void thread_reap_all(void); 149d116b9f1SMateusz Guzik static void thread_reap_task_cb(void *, int); 150d116b9f1SMateusz Guzik static void thread_reap_callout_cb(void *); 15184cdea97SKonstantin Belousov static int thread_unsuspend_one(struct thread *td, struct proc *p, 15284cdea97SKonstantin Belousov bool boundary); 153755341dfSMateusz Guzik static void thread_free_batched(struct thread *td); 154ff8fbcffSJeff Roberson 155d1ca25beSMateusz Guzik static __exclusive_cache_line struct mtx tid_lock; 156934e7e5eSMateusz Guzik static bitstr_t *tid_bitmap; 15735bb59edSMateusz Guzik 158cf7d9a8cSDavid Xu static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash"); 159cf7d9a8cSDavid Xu 1601bd3cf5dSMateusz Guzik static int maxthread; 1611bd3cf5dSMateusz Guzik SYSCTL_INT(_kern, OID_AUTO, maxthread, CTLFLAG_RDTUN, 1621bd3cf5dSMateusz Guzik &maxthread, 0, "Maximum number of threads"); 1631bd3cf5dSMateusz Guzik 16462dbc992SMateusz Guzik static __exclusive_cache_line int nthreads; 1651bd3cf5dSMateusz Guzik 166aae3547bSMateusz Guzik static LIST_HEAD(tidhashhead, thread) *tidhashtbl; 167aae3547bSMateusz Guzik static u_long tidhash; 16826007fe3SMateusz Guzik static u_long tidhashlock; 16926007fe3SMateusz Guzik static struct rwlock *tidhashtbl_lock; 170aae3547bSMateusz Guzik #define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash]) 17126007fe3SMateusz Guzik #define TIDHASHLOCK(tid) (&tidhashtbl_lock[(tid) & tidhashlock]) 172cf7d9a8cSDavid Xu 1732ca45184SMatt Joras EVENTHANDLER_LIST_DEFINE(thread_ctor); 1742ca45184SMatt Joras EVENTHANDLER_LIST_DEFINE(thread_dtor); 1752ca45184SMatt Joras EVENTHANDLER_LIST_DEFINE(thread_init); 1762ca45184SMatt Joras EVENTHANDLER_LIST_DEFINE(thread_fini); 1772ca45184SMatt Joras 17862dbc992SMateusz Guzik static bool 179d116b9f1SMateusz Guzik thread_count_inc_try(void) 180ec6ea5e8SDavid Xu { 18162dbc992SMateusz Guzik int nthreads_new; 182ec6ea5e8SDavid Xu 18362dbc992SMateusz Guzik nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; 18462dbc992SMateusz Guzik if (nthreads_new >= maxthread - 100) { 1851bd3cf5dSMateusz Guzik if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 || 18662dbc992SMateusz Guzik nthreads_new >= maxthread) { 18762dbc992SMateusz Guzik atomic_subtract_int(&nthreads, 1); 188d116b9f1SMateusz Guzik return (false); 189d116b9f1SMateusz Guzik } 190d116b9f1SMateusz Guzik } 191d116b9f1SMateusz Guzik return (true); 192d116b9f1SMateusz Guzik } 193d116b9f1SMateusz Guzik 194d116b9f1SMateusz Guzik static bool 195d116b9f1SMateusz Guzik thread_count_inc(void) 196d116b9f1SMateusz Guzik { 197d116b9f1SMateusz Guzik static struct timeval lastfail; 198d116b9f1SMateusz Guzik static int curfail; 199d116b9f1SMateusz Guzik 200d116b9f1SMateusz Guzik thread_reap(); 201d116b9f1SMateusz Guzik if (thread_count_inc_try()) { 202d116b9f1SMateusz Guzik return (true); 203d116b9f1SMateusz Guzik } 204d116b9f1SMateusz Guzik 205d116b9f1SMateusz Guzik thread_reap_all(); 206d116b9f1SMateusz Guzik if (thread_count_inc_try()) { 207d116b9f1SMateusz Guzik return (true); 208d116b9f1SMateusz Guzik } 209d116b9f1SMateusz Guzik 2101bd3cf5dSMateusz Guzik if (ppsratecheck(&lastfail, &curfail, 1)) { 2111bd3cf5dSMateusz Guzik printf("maxthread limit exceeded by uid %u " 2121bd3cf5dSMateusz Guzik "(pid %d); consider increasing kern.maxthread\n", 2131bd3cf5dSMateusz Guzik curthread->td_ucred->cr_ruid, curproc->p_pid); 2141bd3cf5dSMateusz Guzik } 21562dbc992SMateusz Guzik return (false); 2161bd3cf5dSMateusz Guzik } 2171bd3cf5dSMateusz Guzik 21862dbc992SMateusz Guzik static void 21962dbc992SMateusz Guzik thread_count_sub(int n) 22062dbc992SMateusz Guzik { 22162dbc992SMateusz Guzik 22262dbc992SMateusz Guzik atomic_subtract_int(&nthreads, n); 22362dbc992SMateusz Guzik } 22462dbc992SMateusz Guzik 22562dbc992SMateusz Guzik static void 22662dbc992SMateusz Guzik thread_count_dec(void) 22762dbc992SMateusz Guzik { 22862dbc992SMateusz Guzik 22962dbc992SMateusz Guzik thread_count_sub(1); 23062dbc992SMateusz Guzik } 23162dbc992SMateusz Guzik 23262dbc992SMateusz Guzik static lwpid_t 23362dbc992SMateusz Guzik tid_alloc(void) 23462dbc992SMateusz Guzik { 23562dbc992SMateusz Guzik static lwpid_t trytid; 23662dbc992SMateusz Guzik lwpid_t tid; 23762dbc992SMateusz Guzik 23862dbc992SMateusz Guzik mtx_lock(&tid_lock); 23935bb59edSMateusz Guzik /* 24035bb59edSMateusz Guzik * It is an invariant that the bitmap is big enough to hold maxthread 24135bb59edSMateusz Guzik * IDs. If we got to this point there has to be at least one free. 24235bb59edSMateusz Guzik */ 24335bb59edSMateusz Guzik if (trytid >= maxthread) 24435bb59edSMateusz Guzik trytid = 0; 24535bb59edSMateusz Guzik bit_ffc_at(tid_bitmap, trytid, maxthread, &tid); 24635bb59edSMateusz Guzik if (tid == -1) { 24735bb59edSMateusz Guzik KASSERT(trytid != 0, ("unexpectedly ran out of IDs")); 24835bb59edSMateusz Guzik trytid = 0; 24935bb59edSMateusz Guzik bit_ffc_at(tid_bitmap, trytid, maxthread, &tid); 25035bb59edSMateusz Guzik KASSERT(tid != -1, ("unexpectedly ran out of IDs")); 251ec6ea5e8SDavid Xu } 25235bb59edSMateusz Guzik bit_set(tid_bitmap, tid); 253934e7e5eSMateusz Guzik trytid = tid + 1; 254ec6ea5e8SDavid Xu mtx_unlock(&tid_lock); 25535bb59edSMateusz Guzik return (tid + NO_PID); 256ec6ea5e8SDavid Xu } 257ec6ea5e8SDavid Xu 258ec6ea5e8SDavid Xu static void 259755341dfSMateusz Guzik tid_free_locked(lwpid_t rtid) 260ec6ea5e8SDavid Xu { 26135bb59edSMateusz Guzik lwpid_t tid; 262ec6ea5e8SDavid Xu 263755341dfSMateusz Guzik mtx_assert(&tid_lock, MA_OWNED); 26435bb59edSMateusz Guzik KASSERT(rtid >= NO_PID, 26535bb59edSMateusz Guzik ("%s: invalid tid %d\n", __func__, rtid)); 26635bb59edSMateusz Guzik tid = rtid - NO_PID; 26735bb59edSMateusz Guzik KASSERT(bit_test(tid_bitmap, tid) != 0, 26835bb59edSMateusz Guzik ("thread ID %d not allocated\n", rtid)); 26935bb59edSMateusz Guzik bit_clear(tid_bitmap, tid); 270755341dfSMateusz Guzik } 271755341dfSMateusz Guzik 272755341dfSMateusz Guzik static void 273755341dfSMateusz Guzik tid_free(lwpid_t rtid) 274755341dfSMateusz Guzik { 275755341dfSMateusz Guzik 276755341dfSMateusz Guzik mtx_lock(&tid_lock); 277755341dfSMateusz Guzik tid_free_locked(rtid); 278755341dfSMateusz Guzik mtx_unlock(&tid_lock); 279755341dfSMateusz Guzik } 280755341dfSMateusz Guzik 281755341dfSMateusz Guzik static void 282755341dfSMateusz Guzik tid_free_batch(lwpid_t *batch, int n) 283755341dfSMateusz Guzik { 284755341dfSMateusz Guzik int i; 285755341dfSMateusz Guzik 286755341dfSMateusz Guzik mtx_lock(&tid_lock); 287755341dfSMateusz Guzik for (i = 0; i < n; i++) { 288755341dfSMateusz Guzik tid_free_locked(batch[i]); 289755341dfSMateusz Guzik } 290ec6ea5e8SDavid Xu mtx_unlock(&tid_lock); 291ec6ea5e8SDavid Xu } 292ec6ea5e8SDavid Xu 293fdcac928SMarcel Moolenaar /* 2945ef7b7a0SMateusz Guzik * Batching for thread reapping. 2955ef7b7a0SMateusz Guzik */ 2965ef7b7a0SMateusz Guzik struct tidbatch { 2975ef7b7a0SMateusz Guzik lwpid_t tab[16]; 2985ef7b7a0SMateusz Guzik int n; 2995ef7b7a0SMateusz Guzik }; 3005ef7b7a0SMateusz Guzik 3015ef7b7a0SMateusz Guzik static void 3025ef7b7a0SMateusz Guzik tidbatch_prep(struct tidbatch *tb) 3035ef7b7a0SMateusz Guzik { 3045ef7b7a0SMateusz Guzik 3055ef7b7a0SMateusz Guzik tb->n = 0; 3065ef7b7a0SMateusz Guzik } 3075ef7b7a0SMateusz Guzik 3085ef7b7a0SMateusz Guzik static void 3095ef7b7a0SMateusz Guzik tidbatch_add(struct tidbatch *tb, struct thread *td) 3105ef7b7a0SMateusz Guzik { 3115ef7b7a0SMateusz Guzik 3125ef7b7a0SMateusz Guzik KASSERT(tb->n < nitems(tb->tab), 3135ef7b7a0SMateusz Guzik ("%s: count too high %d", __func__, tb->n)); 3145ef7b7a0SMateusz Guzik tb->tab[tb->n] = td->td_tid; 3155ef7b7a0SMateusz Guzik tb->n++; 3165ef7b7a0SMateusz Guzik } 3175ef7b7a0SMateusz Guzik 3185ef7b7a0SMateusz Guzik static void 3195ef7b7a0SMateusz Guzik tidbatch_process(struct tidbatch *tb) 3205ef7b7a0SMateusz Guzik { 3215ef7b7a0SMateusz Guzik 3225ef7b7a0SMateusz Guzik KASSERT(tb->n <= nitems(tb->tab), 3235ef7b7a0SMateusz Guzik ("%s: count too high %d", __func__, tb->n)); 3245ef7b7a0SMateusz Guzik if (tb->n == nitems(tb->tab)) { 3255ef7b7a0SMateusz Guzik tid_free_batch(tb->tab, tb->n); 3265ef7b7a0SMateusz Guzik tb->n = 0; 3275ef7b7a0SMateusz Guzik } 3285ef7b7a0SMateusz Guzik } 3295ef7b7a0SMateusz Guzik 3305ef7b7a0SMateusz Guzik static void 3315ef7b7a0SMateusz Guzik tidbatch_final(struct tidbatch *tb) 3325ef7b7a0SMateusz Guzik { 3335ef7b7a0SMateusz Guzik 3345ef7b7a0SMateusz Guzik KASSERT(tb->n <= nitems(tb->tab), 3355ef7b7a0SMateusz Guzik ("%s: count too high %d", __func__, tb->n)); 3365ef7b7a0SMateusz Guzik if (tb->n != 0) { 3375ef7b7a0SMateusz Guzik tid_free_batch(tb->tab, tb->n); 3385ef7b7a0SMateusz Guzik } 3395ef7b7a0SMateusz Guzik } 3405ef7b7a0SMateusz Guzik 3415ef7b7a0SMateusz Guzik /* 342696058c3SJulian Elischer * Prepare a thread for use. 34344990b8cSJulian Elischer */ 344b23f72e9SBrian Feldman static int 345b23f72e9SBrian Feldman thread_ctor(void *mem, int size, void *arg, int flags) 34644990b8cSJulian Elischer { 34744990b8cSJulian Elischer struct thread *td; 34844990b8cSJulian Elischer 34944990b8cSJulian Elischer td = (struct thread *)mem; 350fa2528acSAlex Richardson TD_SET_STATE(td, TDS_INACTIVE); 35194dd54b9SKonstantin Belousov td->td_lastcpu = td->td_oncpu = NOCPU; 3526c27c603SJuli Mallett 3536c27c603SJuli Mallett /* 3546c27c603SJuli Mallett * Note that td_critnest begins life as 1 because the thread is not 3556c27c603SJuli Mallett * running and is thereby implicitly waiting to be on the receiving 356a54e85fdSJeff Roberson * end of a context switch. 3576c27c603SJuli Mallett */ 358139b7550SJohn Baldwin td->td_critnest = 1; 359acbe332aSDavid Xu td->td_lend_user_pri = PRI_MAX; 360911b84b0SRobert Watson #ifdef AUDIT 361911b84b0SRobert Watson audit_thread_alloc(td); 362911b84b0SRobert Watson #endif 363598f2b81SMateusz Guzik #ifdef KDTRACE_HOOKS 364598f2b81SMateusz Guzik kdtrace_thread_ctor(td); 365598f2b81SMateusz Guzik #endif 366d10183d9SDavid Xu umtx_thread_alloc(td); 36719d3e47dSMateusz Guzik MPASS(td->td_sel == NULL); 368b23f72e9SBrian Feldman return (0); 36944990b8cSJulian Elischer } 37044990b8cSJulian Elischer 37144990b8cSJulian Elischer /* 37244990b8cSJulian Elischer * Reclaim a thread after use. 37344990b8cSJulian Elischer */ 37444990b8cSJulian Elischer static void 37544990b8cSJulian Elischer thread_dtor(void *mem, int size, void *arg) 37644990b8cSJulian Elischer { 37744990b8cSJulian Elischer struct thread *td; 37844990b8cSJulian Elischer 37944990b8cSJulian Elischer td = (struct thread *)mem; 38044990b8cSJulian Elischer 38144990b8cSJulian Elischer #ifdef INVARIANTS 38244990b8cSJulian Elischer /* Verify that this thread is in a safe state to free. */ 383fa2528acSAlex Richardson switch (TD_GET_STATE(td)) { 38471fad9fdSJulian Elischer case TDS_INHIBITED: 38571fad9fdSJulian Elischer case TDS_RUNNING: 38671fad9fdSJulian Elischer case TDS_CAN_RUN: 38744990b8cSJulian Elischer case TDS_RUNQ: 38844990b8cSJulian Elischer /* 38944990b8cSJulian Elischer * We must never unlink a thread that is in one of 39044990b8cSJulian Elischer * these states, because it is currently active. 39144990b8cSJulian Elischer */ 39244990b8cSJulian Elischer panic("bad state for thread unlinking"); 39344990b8cSJulian Elischer /* NOTREACHED */ 39471fad9fdSJulian Elischer case TDS_INACTIVE: 39544990b8cSJulian Elischer break; 39644990b8cSJulian Elischer default: 39744990b8cSJulian Elischer panic("bad thread state"); 39844990b8cSJulian Elischer /* NOTREACHED */ 39944990b8cSJulian Elischer } 40044990b8cSJulian Elischer #endif 4016e8525ceSRobert Watson #ifdef AUDIT 4026e8525ceSRobert Watson audit_thread_free(td); 4036e8525ceSRobert Watson #endif 404598f2b81SMateusz Guzik #ifdef KDTRACE_HOOKS 405598f2b81SMateusz Guzik kdtrace_thread_dtor(td); 406598f2b81SMateusz Guzik #endif 4071ba4a712SPawel Jakub Dawidek /* Free all OSD associated to this thread. */ 4081ba4a712SPawel Jakub Dawidek osd_thread_exit(td); 409aca4bb91SKonstantin Belousov td_softdep_cleanup(td); 410aca4bb91SKonstantin Belousov MPASS(td->td_su == NULL); 41119d3e47dSMateusz Guzik seltdfini(td); 41244990b8cSJulian Elischer } 41344990b8cSJulian Elischer 41444990b8cSJulian Elischer /* 41544990b8cSJulian Elischer * Initialize type-stable parts of a thread (when newly created). 41644990b8cSJulian Elischer */ 417b23f72e9SBrian Feldman static int 418b23f72e9SBrian Feldman thread_init(void *mem, int size, int flags) 41944990b8cSJulian Elischer { 42044990b8cSJulian Elischer struct thread *td; 42144990b8cSJulian Elischer 42244990b8cSJulian Elischer td = (struct thread *)mem; 423247aba24SMarcel Moolenaar 424b83e94beSMateusz Guzik td->td_allocdomain = vm_phys_domain(vtophys(td)); 42544f3b092SJohn Baldwin td->td_sleepqueue = sleepq_alloc(); 426961a7b24SJohn Baldwin td->td_turnstile = turnstile_alloc(); 4278f0e9130SKonstantin Belousov td->td_rlqe = NULL; 4282ca45184SMatt Joras EVENTHANDLER_DIRECT_INVOKE(thread_init, td); 429d10183d9SDavid Xu umtx_thread_init(td); 43089b57fcfSKonstantin Belousov td->td_kstack = 0; 431ad8b1d85SKonstantin Belousov td->td_sel = NULL; 432b23f72e9SBrian Feldman return (0); 43344990b8cSJulian Elischer } 43444990b8cSJulian Elischer 43544990b8cSJulian Elischer /* 43644990b8cSJulian Elischer * Tear down type-stable parts of a thread (just before being discarded). 43744990b8cSJulian Elischer */ 43844990b8cSJulian Elischer static void 43944990b8cSJulian Elischer thread_fini(void *mem, int size) 44044990b8cSJulian Elischer { 44144990b8cSJulian Elischer struct thread *td; 44244990b8cSJulian Elischer 44344990b8cSJulian Elischer td = (struct thread *)mem; 4442ca45184SMatt Joras EVENTHANDLER_DIRECT_INVOKE(thread_fini, td); 4458f0e9130SKonstantin Belousov rlqentry_free(td->td_rlqe); 446961a7b24SJohn Baldwin turnstile_free(td->td_turnstile); 44744f3b092SJohn Baldwin sleepq_free(td->td_sleepqueue); 448d10183d9SDavid Xu umtx_thread_fini(td); 44919d3e47dSMateusz Guzik MPASS(td->td_sel == NULL); 45044990b8cSJulian Elischer } 4515215b187SJeff Roberson 4525c8329edSJulian Elischer /* 4535215b187SJeff Roberson * For a newly created process, 4545215b187SJeff Roberson * link up all the structures and its initial threads etc. 455ed062c8dSJulian Elischer * called from: 456e7d939bdSMarcel Moolenaar * {arch}/{arch}/machdep.c {arch}_init(), init386() etc. 457ed062c8dSJulian Elischer * proc_dtor() (should go away) 458ed062c8dSJulian Elischer * proc_init() 4595c8329edSJulian Elischer */ 4605c8329edSJulian Elischer void 46189b57fcfSKonstantin Belousov proc_linkup0(struct proc *p, struct thread *td) 46289b57fcfSKonstantin Belousov { 46389b57fcfSKonstantin Belousov TAILQ_INIT(&p->p_threads); /* all threads in proc */ 46489b57fcfSKonstantin Belousov proc_linkup(p, td); 46589b57fcfSKonstantin Belousov } 46689b57fcfSKonstantin Belousov 46789b57fcfSKonstantin Belousov void 4688460a577SJohn Birrell proc_linkup(struct proc *p, struct thread *td) 4695c8329edSJulian Elischer { 470a54e85fdSJeff Roberson 4719104847fSDavid Xu sigqueue_init(&p->p_sigqueue, p); 472ebceaf6dSDavid Xu p->p_ksi = ksiginfo_alloc(1); 473ebceaf6dSDavid Xu if (p->p_ksi != NULL) { 4745c474517SDavid Xu /* XXX p_ksi may be null if ksiginfo zone is not ready */ 475ebceaf6dSDavid Xu p->p_ksi->ksi_flags = KSI_EXT | KSI_INS; 476ebceaf6dSDavid Xu } 477b2f92ef9SDavid Xu LIST_INIT(&p->p_mqnotifier); 4785c8329edSJulian Elischer p->p_numthreads = 0; 4798460a577SJohn Birrell thread_link(td, p); 4805c8329edSJulian Elischer } 4815c8329edSJulian Elischer 4821bd3cf5dSMateusz Guzik extern int max_threads_per_proc; 4831bd3cf5dSMateusz Guzik 4845c8329edSJulian Elischer /* 48544990b8cSJulian Elischer * Initialize global thread allocation resources. 48644990b8cSJulian Elischer */ 48744990b8cSJulian Elischer void 48844990b8cSJulian Elischer threadinit(void) 48944990b8cSJulian Elischer { 49026007fe3SMateusz Guzik u_long i; 491cf31cadeSMateusz Guzik lwpid_t tid0; 4925aa5420fSMark Johnston uint32_t flags; 49344990b8cSJulian Elischer 4941bd3cf5dSMateusz Guzik /* 4951bd3cf5dSMateusz Guzik * Place an upper limit on threads which can be allocated. 4961bd3cf5dSMateusz Guzik * 4971bd3cf5dSMateusz Guzik * Note that other factors may make the de facto limit much lower. 4981bd3cf5dSMateusz Guzik * 4991bd3cf5dSMateusz Guzik * Platform limits are somewhat arbitrary but deemed "more than good 5001bd3cf5dSMateusz Guzik * enough" for the foreseable future. 5011bd3cf5dSMateusz Guzik */ 5021bd3cf5dSMateusz Guzik if (maxthread == 0) { 5031bd3cf5dSMateusz Guzik #ifdef _LP64 5041bd3cf5dSMateusz Guzik maxthread = MIN(maxproc * max_threads_per_proc, 1000000); 5051bd3cf5dSMateusz Guzik #else 5061bd3cf5dSMateusz Guzik maxthread = MIN(maxproc * max_threads_per_proc, 100000); 5071bd3cf5dSMateusz Guzik #endif 5081bd3cf5dSMateusz Guzik } 5091bd3cf5dSMateusz Guzik 5101ea7a6f8SPoul-Henning Kamp mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF); 51135bb59edSMateusz Guzik tid_bitmap = bit_alloc(maxthread, M_TIDHASH, M_WAITOK); 51262dbc992SMateusz Guzik /* 51362dbc992SMateusz Guzik * Handle thread0. 51462dbc992SMateusz Guzik */ 51562dbc992SMateusz Guzik thread_count_inc(); 516cf31cadeSMateusz Guzik tid0 = tid_alloc(); 517cf31cadeSMateusz Guzik if (tid0 != THREAD0_TID) 518cf31cadeSMateusz Guzik panic("tid0 %d != %d\n", tid0, THREAD0_TID); 5191ea7a6f8SPoul-Henning Kamp 5205aa5420fSMark Johnston flags = UMA_ZONE_NOFREE; 5215aa5420fSMark Johnston #ifdef __aarch64__ 5225aa5420fSMark Johnston /* 5235aa5420fSMark Johnston * Force thread structures to be allocated from the direct map. 5245aa5420fSMark Johnston * Otherwise, superpage promotions and demotions may temporarily 5255aa5420fSMark Johnston * invalidate thread structure mappings. For most dynamically allocated 5265aa5420fSMark Johnston * structures this is not a problem, but translation faults cannot be 5275aa5420fSMark Johnston * handled without accessing curthread. 5285aa5420fSMark Johnston */ 5295aa5420fSMark Johnston flags |= UMA_ZONE_CONTIG; 5305aa5420fSMark Johnston #endif 531de028f5aSJeff Roberson thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 53244990b8cSJulian Elischer thread_ctor, thread_dtor, thread_init, thread_fini, 5335aa5420fSMark Johnston 32 - 1, flags); 534cf7d9a8cSDavid Xu tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash); 53526007fe3SMateusz Guzik tidhashlock = (tidhash + 1) / 64; 53626007fe3SMateusz Guzik if (tidhashlock > 0) 53726007fe3SMateusz Guzik tidhashlock--; 53826007fe3SMateusz Guzik tidhashtbl_lock = malloc(sizeof(*tidhashtbl_lock) * (tidhashlock + 1), 53926007fe3SMateusz Guzik M_TIDHASH, M_WAITOK | M_ZERO); 54026007fe3SMateusz Guzik for (i = 0; i < tidhashlock + 1; i++) 54126007fe3SMateusz Guzik rw_init(&tidhashtbl_lock[i], "tidhash"); 542d116b9f1SMateusz Guzik 543d116b9f1SMateusz Guzik TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL); 544d116b9f1SMateusz Guzik callout_init(&thread_reap_callout, 1); 545845d7797SKonstantin Belousov callout_reset(&thread_reap_callout, 5 * hz, 546845d7797SKonstantin Belousov thread_reap_callout_cb, NULL); 54744990b8cSJulian Elischer } 54844990b8cSJulian Elischer 54944990b8cSJulian Elischer /* 550ff8fbcffSJeff Roberson * Place an unused thread on the zombie list. 55144990b8cSJulian Elischer */ 55244990b8cSJulian Elischer void 553ff8fbcffSJeff Roberson thread_zombie(struct thread *td) 55444990b8cSJulian Elischer { 555d116b9f1SMateusz Guzik struct thread_domain_data *tdd; 556c5315f51SMateusz Guzik struct thread *ztd; 557c5315f51SMateusz Guzik 558a9568cd2SMateusz Guzik tdd = &thread_domain_data[td->td_allocdomain]; 559d116b9f1SMateusz Guzik ztd = atomic_load_ptr(&tdd->tdd_zombies); 560c5315f51SMateusz Guzik for (;;) { 561c5315f51SMateusz Guzik td->td_zombie = ztd; 562d116b9f1SMateusz Guzik if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies, 563c5315f51SMateusz Guzik (uintptr_t *)&ztd, (uintptr_t)td)) 564c5315f51SMateusz Guzik break; 565c5315f51SMateusz Guzik continue; 566c5315f51SMateusz Guzik } 56744990b8cSJulian Elischer } 56844990b8cSJulian Elischer 5695c8329edSJulian Elischer /* 570ff8fbcffSJeff Roberson * Release a thread that has exited after cpu_throw(). 571ff8fbcffSJeff Roberson */ 572ff8fbcffSJeff Roberson void 573ff8fbcffSJeff Roberson thread_stash(struct thread *td) 574ff8fbcffSJeff Roberson { 575ff8fbcffSJeff Roberson atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1); 576ff8fbcffSJeff Roberson thread_zombie(td); 577ff8fbcffSJeff Roberson } 578ff8fbcffSJeff Roberson 579ff8fbcffSJeff Roberson /* 580d116b9f1SMateusz Guzik * Reap zombies from passed domain. 58144990b8cSJulian Elischer */ 582d116b9f1SMateusz Guzik static void 583d116b9f1SMateusz Guzik thread_reap_domain(struct thread_domain_data *tdd) 58444990b8cSJulian Elischer { 585c5315f51SMateusz Guzik struct thread *itd, *ntd; 5865ef7b7a0SMateusz Guzik struct tidbatch tidbatch; 587f34a2f56SMateusz Guzik struct credbatch credbatch; 5885ef7b7a0SMateusz Guzik int tdcount; 589fb8ab680SMateusz Guzik struct plimit *lim; 590fb8ab680SMateusz Guzik int limcount; 59144990b8cSJulian Elischer 59244990b8cSJulian Elischer /* 593c5315f51SMateusz Guzik * Reading upfront is pessimal if followed by concurrent atomic_swap, 594c5315f51SMateusz Guzik * but most of the time the list is empty. 59544990b8cSJulian Elischer */ 596d116b9f1SMateusz Guzik if (tdd->tdd_zombies == NULL) 597c5315f51SMateusz Guzik return; 598c5315f51SMateusz Guzik 599d116b9f1SMateusz Guzik itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies, 600c5315f51SMateusz Guzik (uintptr_t)NULL); 6015ef7b7a0SMateusz Guzik if (itd == NULL) 6025ef7b7a0SMateusz Guzik return; 6035ef7b7a0SMateusz Guzik 604d116b9f1SMateusz Guzik /* 605d116b9f1SMateusz Guzik * Multiple CPUs can get here, the race is fine as ticks is only 606d116b9f1SMateusz Guzik * advisory. 607d116b9f1SMateusz Guzik */ 608d116b9f1SMateusz Guzik tdd->tdd_reapticks = ticks; 609d116b9f1SMateusz Guzik 6105ef7b7a0SMateusz Guzik tidbatch_prep(&tidbatch); 611f34a2f56SMateusz Guzik credbatch_prep(&credbatch); 6125ef7b7a0SMateusz Guzik tdcount = 0; 613fb8ab680SMateusz Guzik lim = NULL; 614fb8ab680SMateusz Guzik limcount = 0; 615d116b9f1SMateusz Guzik 616c5315f51SMateusz Guzik while (itd != NULL) { 617c5315f51SMateusz Guzik ntd = itd->td_zombie; 6185ef7b7a0SMateusz Guzik EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd); 6195ef7b7a0SMateusz Guzik tidbatch_add(&tidbatch, itd); 620f34a2f56SMateusz Guzik credbatch_add(&credbatch, itd); 621fb8ab680SMateusz Guzik MPASS(itd->td_limit != NULL); 622fb8ab680SMateusz Guzik if (lim != itd->td_limit) { 623fb8ab680SMateusz Guzik if (limcount != 0) { 624fb8ab680SMateusz Guzik lim_freen(lim, limcount); 625fb8ab680SMateusz Guzik limcount = 0; 626fb8ab680SMateusz Guzik } 627fb8ab680SMateusz Guzik } 628fb8ab680SMateusz Guzik lim = itd->td_limit; 629fb8ab680SMateusz Guzik limcount++; 630755341dfSMateusz Guzik thread_free_batched(itd); 6315ef7b7a0SMateusz Guzik tidbatch_process(&tidbatch); 632f34a2f56SMateusz Guzik credbatch_process(&credbatch); 6335ef7b7a0SMateusz Guzik tdcount++; 6345ef7b7a0SMateusz Guzik if (tdcount == 32) { 6355ef7b7a0SMateusz Guzik thread_count_sub(tdcount); 6365ef7b7a0SMateusz Guzik tdcount = 0; 637755341dfSMateusz Guzik } 638c5315f51SMateusz Guzik itd = ntd; 63944990b8cSJulian Elischer } 640755341dfSMateusz Guzik 6415ef7b7a0SMateusz Guzik tidbatch_final(&tidbatch); 642f34a2f56SMateusz Guzik credbatch_final(&credbatch); 6435ef7b7a0SMateusz Guzik if (tdcount != 0) { 6445ef7b7a0SMateusz Guzik thread_count_sub(tdcount); 645755341dfSMateusz Guzik } 646fb8ab680SMateusz Guzik MPASS(limcount != 0); 647fb8ab680SMateusz Guzik lim_freen(lim, limcount); 648ed062c8dSJulian Elischer } 64944990b8cSJulian Elischer 6504f0db5e0SJulian Elischer /* 651d116b9f1SMateusz Guzik * Reap zombies from all domains. 652d116b9f1SMateusz Guzik */ 653d116b9f1SMateusz Guzik static void 654d116b9f1SMateusz Guzik thread_reap_all(void) 655d116b9f1SMateusz Guzik { 656d116b9f1SMateusz Guzik struct thread_domain_data *tdd; 657d116b9f1SMateusz Guzik int i, domain; 658d116b9f1SMateusz Guzik 659d116b9f1SMateusz Guzik domain = PCPU_GET(domain); 660d116b9f1SMateusz Guzik for (i = 0; i < vm_ndomains; i++) { 661d116b9f1SMateusz Guzik tdd = &thread_domain_data[(i + domain) % vm_ndomains]; 662d116b9f1SMateusz Guzik thread_reap_domain(tdd); 663d116b9f1SMateusz Guzik } 664d116b9f1SMateusz Guzik } 665d116b9f1SMateusz Guzik 666d116b9f1SMateusz Guzik /* 667d116b9f1SMateusz Guzik * Reap zombies from local domain. 668d116b9f1SMateusz Guzik */ 669b83e94beSMateusz Guzik static void 670d116b9f1SMateusz Guzik thread_reap(void) 671d116b9f1SMateusz Guzik { 672d116b9f1SMateusz Guzik struct thread_domain_data *tdd; 673d116b9f1SMateusz Guzik int domain; 674d116b9f1SMateusz Guzik 675d116b9f1SMateusz Guzik domain = PCPU_GET(domain); 676d116b9f1SMateusz Guzik tdd = &thread_domain_data[domain]; 677d116b9f1SMateusz Guzik 678d116b9f1SMateusz Guzik thread_reap_domain(tdd); 679d116b9f1SMateusz Guzik } 680d116b9f1SMateusz Guzik 681d116b9f1SMateusz Guzik static void 682d116b9f1SMateusz Guzik thread_reap_task_cb(void *arg __unused, int pending __unused) 683d116b9f1SMateusz Guzik { 684d116b9f1SMateusz Guzik 685d116b9f1SMateusz Guzik thread_reap_all(); 686d116b9f1SMateusz Guzik } 687d116b9f1SMateusz Guzik 688d116b9f1SMateusz Guzik static void 689d116b9f1SMateusz Guzik thread_reap_callout_cb(void *arg __unused) 690d116b9f1SMateusz Guzik { 691d116b9f1SMateusz Guzik struct thread_domain_data *tdd; 692d116b9f1SMateusz Guzik int i, cticks, lticks; 693d116b9f1SMateusz Guzik bool wantreap; 694d116b9f1SMateusz Guzik 695d116b9f1SMateusz Guzik wantreap = false; 696d116b9f1SMateusz Guzik cticks = atomic_load_int(&ticks); 697d116b9f1SMateusz Guzik for (i = 0; i < vm_ndomains; i++) { 698d116b9f1SMateusz Guzik tdd = &thread_domain_data[i]; 699d116b9f1SMateusz Guzik lticks = tdd->tdd_reapticks; 700d116b9f1SMateusz Guzik if (tdd->tdd_zombies != NULL && 701d116b9f1SMateusz Guzik (u_int)(cticks - lticks) > 5 * hz) { 702d116b9f1SMateusz Guzik wantreap = true; 703d116b9f1SMateusz Guzik break; 704d116b9f1SMateusz Guzik } 705d116b9f1SMateusz Guzik } 706d116b9f1SMateusz Guzik 707d116b9f1SMateusz Guzik if (wantreap) 708d116b9f1SMateusz Guzik taskqueue_enqueue(taskqueue_thread, &thread_reap_task); 709845d7797SKonstantin Belousov callout_reset(&thread_reap_callout, 5 * hz, 710845d7797SKonstantin Belousov thread_reap_callout_cb, NULL); 711d116b9f1SMateusz Guzik } 712d116b9f1SMateusz Guzik 713d116b9f1SMateusz Guzik /* 714f62c7e54SKonstantin Belousov * Calling this function guarantees that any thread that exited before 715f62c7e54SKonstantin Belousov * the call is reaped when the function returns. By 'exited' we mean 716f62c7e54SKonstantin Belousov * a thread removed from the process linkage with thread_unlink(). 717f62c7e54SKonstantin Belousov * Practically this means that caller must lock/unlock corresponding 718f62c7e54SKonstantin Belousov * process lock before the call, to synchronize with thread_exit(). 719f62c7e54SKonstantin Belousov */ 720f62c7e54SKonstantin Belousov void 721f62c7e54SKonstantin Belousov thread_reap_barrier(void) 722f62c7e54SKonstantin Belousov { 723f62c7e54SKonstantin Belousov struct task *t; 724f62c7e54SKonstantin Belousov 725f62c7e54SKonstantin Belousov /* 726f62c7e54SKonstantin Belousov * First do context switches to each CPU to ensure that all 727f62c7e54SKonstantin Belousov * PCPU pc_deadthreads are moved to zombie list. 728f62c7e54SKonstantin Belousov */ 729f62c7e54SKonstantin Belousov quiesce_all_cpus("", PDROP); 730f62c7e54SKonstantin Belousov 731f62c7e54SKonstantin Belousov /* 732f62c7e54SKonstantin Belousov * Second, fire the task in the same thread as normal 733f62c7e54SKonstantin Belousov * thread_reap() is done, to serialize reaping. 734f62c7e54SKonstantin Belousov */ 735f62c7e54SKonstantin Belousov t = malloc(sizeof(*t), M_TEMP, M_WAITOK); 736f62c7e54SKonstantin Belousov TASK_INIT(t, 0, thread_reap_task_cb, t); 737f62c7e54SKonstantin Belousov taskqueue_enqueue(taskqueue_thread, t); 738f62c7e54SKonstantin Belousov taskqueue_drain(taskqueue_thread, t); 739f62c7e54SKonstantin Belousov free(t, M_TEMP); 740f62c7e54SKonstantin Belousov } 741f62c7e54SKonstantin Belousov 742f62c7e54SKonstantin Belousov /* 74344990b8cSJulian Elischer * Allocate a thread. 74444990b8cSJulian Elischer */ 74544990b8cSJulian Elischer struct thread * 7468a945d10SKonstantin Belousov thread_alloc(int pages) 74744990b8cSJulian Elischer { 74889b57fcfSKonstantin Belousov struct thread *td; 7491bd3cf5dSMateusz Guzik lwpid_t tid; 7508460a577SJohn Birrell 75162dbc992SMateusz Guzik if (!thread_count_inc()) { 7521bd3cf5dSMateusz Guzik return (NULL); 7531bd3cf5dSMateusz Guzik } 7541bd3cf5dSMateusz Guzik 75562dbc992SMateusz Guzik tid = tid_alloc(); 7561bd3cf5dSMateusz Guzik td = uma_zalloc(thread_zone, M_WAITOK); 75789b57fcfSKonstantin Belousov KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack")); 7588a945d10SKonstantin Belousov if (!vm_thread_new(td, pages)) { 75989b57fcfSKonstantin Belousov uma_zfree(thread_zone, td); 7601bd3cf5dSMateusz Guzik tid_free(tid); 76162dbc992SMateusz Guzik thread_count_dec(); 76289b57fcfSKonstantin Belousov return (NULL); 76389b57fcfSKonstantin Belousov } 7641bd3cf5dSMateusz Guzik td->td_tid = tid; 765f575573cSKonstantin Belousov bzero(&td->td_sa.args, sizeof(td->td_sa.args)); 7665dda15adSMark Johnston kmsan_thread_alloc(td); 7670c3967e7SMarcel Moolenaar cpu_thread_alloc(td); 7681bd3cf5dSMateusz Guzik EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); 76989b57fcfSKonstantin Belousov return (td); 77044990b8cSJulian Elischer } 77144990b8cSJulian Elischer 7728a945d10SKonstantin Belousov int 7738a945d10SKonstantin Belousov thread_alloc_stack(struct thread *td, int pages) 7748a945d10SKonstantin Belousov { 7758a945d10SKonstantin Belousov 7768a945d10SKonstantin Belousov KASSERT(td->td_kstack == 0, 7778a945d10SKonstantin Belousov ("thread_alloc_stack called on a thread with kstack")); 7788a945d10SKonstantin Belousov if (!vm_thread_new(td, pages)) 7798a945d10SKonstantin Belousov return (0); 7808a945d10SKonstantin Belousov cpu_thread_alloc(td); 7818a945d10SKonstantin Belousov return (1); 7828a945d10SKonstantin Belousov } 7834f0db5e0SJulian Elischer 7844f0db5e0SJulian Elischer /* 78544990b8cSJulian Elischer * Deallocate a thread. 78644990b8cSJulian Elischer */ 787755341dfSMateusz Guzik static void 788755341dfSMateusz Guzik thread_free_batched(struct thread *td) 78944990b8cSJulian Elischer { 7902e6b8de4SJeff Roberson 7912e6b8de4SJeff Roberson lock_profile_thread_exit(td); 79245aea8deSJeff Roberson if (td->td_cpuset) 793d7f687fcSJeff Roberson cpuset_rel(td->td_cpuset); 794d7f687fcSJeff Roberson td->td_cpuset = NULL; 7950c3967e7SMarcel Moolenaar cpu_thread_free(td); 79689b57fcfSKonstantin Belousov if (td->td_kstack != 0) 79789b57fcfSKonstantin Belousov vm_thread_dispose(td); 7982d19b736SKonstantin Belousov callout_drain(&td->td_slpcallout); 799755341dfSMateusz Guzik /* 800755341dfSMateusz Guzik * Freeing handled by the caller. 801755341dfSMateusz Guzik */ 8021bd3cf5dSMateusz Guzik td->td_tid = -1; 8035dda15adSMark Johnston kmsan_thread_free(td); 80444990b8cSJulian Elischer uma_zfree(thread_zone, td); 80544990b8cSJulian Elischer } 80644990b8cSJulian Elischer 8074ea6a9a2SMateusz Guzik void 808755341dfSMateusz Guzik thread_free(struct thread *td) 809755341dfSMateusz Guzik { 810755341dfSMateusz Guzik lwpid_t tid; 811755341dfSMateusz Guzik 8125ef7b7a0SMateusz Guzik EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td); 813755341dfSMateusz Guzik tid = td->td_tid; 814755341dfSMateusz Guzik thread_free_batched(td); 815755341dfSMateusz Guzik tid_free(tid); 81662dbc992SMateusz Guzik thread_count_dec(); 817755341dfSMateusz Guzik } 818755341dfSMateusz Guzik 819755341dfSMateusz Guzik void 8204ea6a9a2SMateusz Guzik thread_cow_get_proc(struct thread *newtd, struct proc *p) 8214ea6a9a2SMateusz Guzik { 8224ea6a9a2SMateusz Guzik 8234ea6a9a2SMateusz Guzik PROC_LOCK_ASSERT(p, MA_OWNED); 8241724c563SMateusz Guzik newtd->td_realucred = crcowget(p->p_ucred); 8251724c563SMateusz Guzik newtd->td_ucred = newtd->td_realucred; 826f6f6d240SMateusz Guzik newtd->td_limit = lim_hold(p->p_limit); 8274ea6a9a2SMateusz Guzik newtd->td_cowgen = p->p_cowgen; 8284ea6a9a2SMateusz Guzik } 8294ea6a9a2SMateusz Guzik 8304ea6a9a2SMateusz Guzik void 8314ea6a9a2SMateusz Guzik thread_cow_get(struct thread *newtd, struct thread *td) 8324ea6a9a2SMateusz Guzik { 8334ea6a9a2SMateusz Guzik 8341724c563SMateusz Guzik MPASS(td->td_realucred == td->td_ucred); 8351724c563SMateusz Guzik newtd->td_realucred = crcowget(td->td_realucred); 8361724c563SMateusz Guzik newtd->td_ucred = newtd->td_realucred; 837f6f6d240SMateusz Guzik newtd->td_limit = lim_hold(td->td_limit); 8384ea6a9a2SMateusz Guzik newtd->td_cowgen = td->td_cowgen; 8394ea6a9a2SMateusz Guzik } 8404ea6a9a2SMateusz Guzik 8414ea6a9a2SMateusz Guzik void 8424ea6a9a2SMateusz Guzik thread_cow_free(struct thread *td) 8434ea6a9a2SMateusz Guzik { 8444ea6a9a2SMateusz Guzik 8451724c563SMateusz Guzik if (td->td_realucred != NULL) 8461724c563SMateusz Guzik crcowfree(td); 847cd672ca6SMateusz Guzik if (td->td_limit != NULL) 848f6f6d240SMateusz Guzik lim_free(td->td_limit); 8494ea6a9a2SMateusz Guzik } 8504ea6a9a2SMateusz Guzik 8514ea6a9a2SMateusz Guzik void 8524ea6a9a2SMateusz Guzik thread_cow_update(struct thread *td) 8534ea6a9a2SMateusz Guzik { 8544ea6a9a2SMateusz Guzik struct proc *p; 855cd672ca6SMateusz Guzik struct ucred *oldcred; 856cd672ca6SMateusz Guzik struct plimit *oldlimit; 8574ea6a9a2SMateusz Guzik 8584ea6a9a2SMateusz Guzik p = td->td_proc; 859cd672ca6SMateusz Guzik oldlimit = NULL; 8604ea6a9a2SMateusz Guzik PROC_LOCK(p); 8611724c563SMateusz Guzik oldcred = crcowsync(); 8628a0cb04dSMateusz Guzik oldlimit = lim_cowsync(); 8634ea6a9a2SMateusz Guzik td->td_cowgen = p->p_cowgen; 8644ea6a9a2SMateusz Guzik PROC_UNLOCK(p); 865cd672ca6SMateusz Guzik if (oldcred != NULL) 866cd672ca6SMateusz Guzik crfree(oldcred); 867cd672ca6SMateusz Guzik if (oldlimit != NULL) 868cd672ca6SMateusz Guzik lim_free(oldlimit); 8694ea6a9a2SMateusz Guzik } 8704ea6a9a2SMateusz Guzik 871*32114b63SMateusz Guzik void 872*32114b63SMateusz Guzik thread_cow_synced(struct thread *td) 873*32114b63SMateusz Guzik { 874*32114b63SMateusz Guzik struct proc *p; 875*32114b63SMateusz Guzik 876*32114b63SMateusz Guzik p = td->td_proc; 877*32114b63SMateusz Guzik PROC_LOCK_ASSERT(p, MA_OWNED); 878*32114b63SMateusz Guzik MPASS(td->td_cowgen != p->p_cowgen); 879*32114b63SMateusz Guzik MPASS(td->td_ucred == p->p_ucred); 880*32114b63SMateusz Guzik MPASS(td->td_limit == p->p_limit); 881*32114b63SMateusz Guzik td->td_cowgen = p->p_cowgen; 882*32114b63SMateusz Guzik } 883*32114b63SMateusz Guzik 88444990b8cSJulian Elischer /* 88544990b8cSJulian Elischer * Discard the current thread and exit from its context. 88694e0a4cdSJulian Elischer * Always called with scheduler locked. 88744990b8cSJulian Elischer * 88844990b8cSJulian Elischer * Because we can't free a thread while we're operating under its context, 889696058c3SJulian Elischer * push the current thread into our CPU's deadthread holder. This means 890696058c3SJulian Elischer * we needn't worry about someone else grabbing our context before we 8916617724cSJeff Roberson * do a cpu_throw(). 89244990b8cSJulian Elischer */ 89344990b8cSJulian Elischer void 89444990b8cSJulian Elischer thread_exit(void) 89544990b8cSJulian Elischer { 8967e3a96eaSJohn Baldwin uint64_t runtime, new_switchtime; 89744990b8cSJulian Elischer struct thread *td; 8981c4bcd05SJeff Roberson struct thread *td2; 89944990b8cSJulian Elischer struct proc *p; 9007847a9daSJohn Baldwin int wakeup_swapper; 90144990b8cSJulian Elischer 90244990b8cSJulian Elischer td = curthread; 90344990b8cSJulian Elischer p = td->td_proc; 90444990b8cSJulian Elischer 905a54e85fdSJeff Roberson PROC_SLOCK_ASSERT(p, MA_OWNED); 906ed062c8dSJulian Elischer mtx_assert(&Giant, MA_NOTOWNED); 907a54e85fdSJeff Roberson 90844990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 909ed062c8dSJulian Elischer KASSERT(p != NULL, ("thread exiting without a process")); 910cc701b73SRobert Watson CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td, 911e01eafefSJulian Elischer (long)p->p_pid, td->td_name); 9126c9271a9SAndriy Gapon SDT_PROBE0(proc, , , lwp__exit); 9139104847fSDavid Xu KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending")); 914936c24faSMateusz Guzik MPASS(td->td_realucred == td->td_ucred); 91544990b8cSJulian Elischer 916ed062c8dSJulian Elischer /* 917ed062c8dSJulian Elischer * drop FPU & debug register state storage, or any other 918ed062c8dSJulian Elischer * architecture specific resources that 919ed062c8dSJulian Elischer * would not be on a new untouched process. 920ed062c8dSJulian Elischer */ 921bd07998eSKonstantin Belousov cpu_thread_exit(td); 92244990b8cSJulian Elischer 923ed062c8dSJulian Elischer /* 9241faf202eSJulian Elischer * The last thread is left attached to the process 9251faf202eSJulian Elischer * So that the whole bundle gets recycled. Skip 926ed062c8dSJulian Elischer * all this stuff if we never had threads. 927ed062c8dSJulian Elischer * EXIT clears all sign of other threads when 928ed062c8dSJulian Elischer * it goes to single threading, so the last thread always 929ed062c8dSJulian Elischer * takes the short path. 9301faf202eSJulian Elischer */ 931ed062c8dSJulian Elischer if (p->p_flag & P_HADTHREADS) { 9321faf202eSJulian Elischer if (p->p_numthreads > 1) { 933fd229b5bSKonstantin Belousov atomic_add_int(&td->td_proc->p_exitthreads, 1); 934d3a0bd78SJulian Elischer thread_unlink(td); 9351c4bcd05SJeff Roberson td2 = FIRST_THREAD_IN_PROC(p); 9361c4bcd05SJeff Roberson sched_exit_thread(td2, td); 937ed062c8dSJulian Elischer 938ed062c8dSJulian Elischer /* 93944990b8cSJulian Elischer * The test below is NOT true if we are the 9409182554aSKonstantin Belousov * sole exiting thread. P_STOPPED_SINGLE is unset 94144990b8cSJulian Elischer * in exit1() after it is the only survivor. 94244990b8cSJulian Elischer */ 9431279572aSDavid Xu if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 94444990b8cSJulian Elischer if (p->p_numthreads == p->p_suspcount) { 945a54e85fdSJeff Roberson thread_lock(p->p_singlethread); 9467847a9daSJohn Baldwin wakeup_swapper = thread_unsuspend_one( 94784cdea97SKonstantin Belousov p->p_singlethread, p, false); 9487847a9daSJohn Baldwin if (wakeup_swapper) 9497847a9daSJohn Baldwin kick_proc0(); 95044990b8cSJulian Elischer } 95144990b8cSJulian Elischer } 95248bfcdddSJulian Elischer 953696058c3SJulian Elischer PCPU_SET(deadthread, td); 9541faf202eSJulian Elischer } else { 955ed062c8dSJulian Elischer /* 956ed062c8dSJulian Elischer * The last thread is exiting.. but not through exit() 957ed062c8dSJulian Elischer */ 958ed062c8dSJulian Elischer panic ("thread_exit: Last thread exiting on its own"); 959ed062c8dSJulian Elischer } 9601faf202eSJulian Elischer } 96116d95d4fSJoseph Koshy #ifdef HWPMC_HOOKS 96216d95d4fSJoseph Koshy /* 96316d95d4fSJoseph Koshy * If this thread is part of a process that is being tracked by hwpmc(4), 96416d95d4fSJoseph Koshy * inform the module of the thread's impending exit. 96516d95d4fSJoseph Koshy */ 9666161b98cSMatt Macy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) { 96716d95d4fSJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 9686161b98cSMatt Macy PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT, NULL); 969ebfaf69cSMatt Macy } else if (PMC_SYSTEM_SAMPLING_ACTIVE()) 970ebfaf69cSMatt Macy PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT_LOG, NULL); 97116d95d4fSJoseph Koshy #endif 972a54e85fdSJeff Roberson PROC_UNLOCK(p); 9735c7bebf9SKonstantin Belousov PROC_STATLOCK(p); 9745c7bebf9SKonstantin Belousov thread_lock(td); 9755c7bebf9SKonstantin Belousov PROC_SUNLOCK(p); 9767e3a96eaSJohn Baldwin 9777e3a96eaSJohn Baldwin /* Do the same timestamp bookkeeping that mi_switch() would do. */ 9787e3a96eaSJohn Baldwin new_switchtime = cpu_ticks(); 9797e3a96eaSJohn Baldwin runtime = new_switchtime - PCPU_GET(switchtime); 9807e3a96eaSJohn Baldwin td->td_runtime += runtime; 9817e3a96eaSJohn Baldwin td->td_incruntime += runtime; 9827e3a96eaSJohn Baldwin PCPU_SET(switchtime, new_switchtime); 9837e3a96eaSJohn Baldwin PCPU_SET(switchticks, ticks); 98483c9dea1SGleb Smirnoff VM_CNT_INC(v_swtch); 9857e3a96eaSJohn Baldwin 9867e3a96eaSJohn Baldwin /* Save our resource usage in our process. */ 9877e3a96eaSJohn Baldwin td->td_ru.ru_nvcsw++; 98861a74c5cSJeff Roberson ruxagg_locked(p, td); 9897e3a96eaSJohn Baldwin rucollect(&p->p_ru, &td->td_ru); 9905c7bebf9SKonstantin Belousov PROC_STATUNLOCK(p); 9917e3a96eaSJohn Baldwin 992fa2528acSAlex Richardson TD_SET_STATE(td, TDS_INACTIVE); 9933d06b4b3SAttilio Rao #ifdef WITNESS 9943d06b4b3SAttilio Rao witness_thread_exit(td); 9953d06b4b3SAttilio Rao #endif 996732d9528SJulian Elischer CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td); 997a54e85fdSJeff Roberson sched_throw(td); 998cc66ebe2SPeter Wemm panic("I'm a teapot!"); 99944990b8cSJulian Elischer /* NOTREACHED */ 100044990b8cSJulian Elischer } 100144990b8cSJulian Elischer 100244990b8cSJulian Elischer /* 1003696058c3SJulian Elischer * Do any thread specific cleanups that may be needed in wait() 100437814395SPeter Wemm * called with Giant, proc and schedlock not held. 1005696058c3SJulian Elischer */ 1006696058c3SJulian Elischer void 1007696058c3SJulian Elischer thread_wait(struct proc *p) 1008696058c3SJulian Elischer { 1009696058c3SJulian Elischer struct thread *td; 1010696058c3SJulian Elischer 101137814395SPeter Wemm mtx_assert(&Giant, MA_NOTOWNED); 1012624bf9e1SKonstantin Belousov KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()")); 1013624bf9e1SKonstantin Belousov KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking")); 1014ff8fbcffSJeff Roberson td = FIRST_THREAD_IN_PROC(p); 1015ff8fbcffSJeff Roberson /* Lock the last thread so we spin until it exits cpu_throw(). */ 1016ff8fbcffSJeff Roberson thread_lock(td); 1017ff8fbcffSJeff Roberson thread_unlock(td); 10182e6b8de4SJeff Roberson lock_profile_thread_exit(td); 1019d7f687fcSJeff Roberson cpuset_rel(td->td_cpuset); 1020d7f687fcSJeff Roberson td->td_cpuset = NULL; 1021696058c3SJulian Elischer cpu_thread_clean(td); 10224ea6a9a2SMateusz Guzik thread_cow_free(td); 10232d19b736SKonstantin Belousov callout_drain(&td->td_slpcallout); 1024696058c3SJulian Elischer thread_reap(); /* check for zombie threads etc. */ 1025696058c3SJulian Elischer } 1026696058c3SJulian Elischer 1027696058c3SJulian Elischer /* 102844990b8cSJulian Elischer * Link a thread to a process. 10291faf202eSJulian Elischer * set up anything that needs to be initialized for it to 10301faf202eSJulian Elischer * be used by the process. 103144990b8cSJulian Elischer */ 103244990b8cSJulian Elischer void 10338460a577SJohn Birrell thread_link(struct thread *td, struct proc *p) 103444990b8cSJulian Elischer { 103544990b8cSJulian Elischer 1036a54e85fdSJeff Roberson /* 1037a54e85fdSJeff Roberson * XXX This can't be enabled because it's called for proc0 before 1038374ae2a3SJeff Roberson * its lock has been created. 1039374ae2a3SJeff Roberson * PROC_LOCK_ASSERT(p, MA_OWNED); 1040a54e85fdSJeff Roberson */ 1041fa2528acSAlex Richardson TD_SET_STATE(td, TDS_INACTIVE); 104244990b8cSJulian Elischer td->td_proc = p; 1043b61ce5b0SJeff Roberson td->td_flags = TDF_INMEM; 104444990b8cSJulian Elischer 10451faf202eSJulian Elischer LIST_INIT(&td->td_contested); 1046eea4f254SJeff Roberson LIST_INIT(&td->td_lprof[0]); 1047eea4f254SJeff Roberson LIST_INIT(&td->td_lprof[1]); 1048f6eccf96SGleb Smirnoff #ifdef EPOCH_TRACE 1049dd902d01SGleb Smirnoff SLIST_INIT(&td->td_epochs); 1050f6eccf96SGleb Smirnoff #endif 10519104847fSDavid Xu sigqueue_init(&td->td_sigqueue, p); 1052fd90e2edSJung-uk Kim callout_init(&td->td_slpcallout, 1); 105366d8df9dSDaniel Eischen TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist); 105444990b8cSJulian Elischer p->p_numthreads++; 105544990b8cSJulian Elischer } 105644990b8cSJulian Elischer 1057ed062c8dSJulian Elischer /* 1058ed062c8dSJulian Elischer * Called from: 1059ed062c8dSJulian Elischer * thread_exit() 1060ed062c8dSJulian Elischer */ 1061d3a0bd78SJulian Elischer void 1062d3a0bd78SJulian Elischer thread_unlink(struct thread *td) 1063d3a0bd78SJulian Elischer { 1064d3a0bd78SJulian Elischer struct proc *p = td->td_proc; 1065d3a0bd78SJulian Elischer 1066374ae2a3SJeff Roberson PROC_LOCK_ASSERT(p, MA_OWNED); 1067f6eccf96SGleb Smirnoff #ifdef EPOCH_TRACE 1068dd902d01SGleb Smirnoff MPASS(SLIST_EMPTY(&td->td_epochs)); 1069f6eccf96SGleb Smirnoff #endif 1070dd902d01SGleb Smirnoff 1071d3a0bd78SJulian Elischer TAILQ_REMOVE(&p->p_threads, td, td_plist); 1072d3a0bd78SJulian Elischer p->p_numthreads--; 1073d3a0bd78SJulian Elischer /* could clear a few other things here */ 10748460a577SJohn Birrell /* Must NOT clear links to proc! */ 10755c8329edSJulian Elischer } 10765c8329edSJulian Elischer 107779799053SKonstantin Belousov static int 107879799053SKonstantin Belousov calc_remaining(struct proc *p, int mode) 107979799053SKonstantin Belousov { 108079799053SKonstantin Belousov int remaining; 108179799053SKonstantin Belousov 10827b519077SKonstantin Belousov PROC_LOCK_ASSERT(p, MA_OWNED); 10837b519077SKonstantin Belousov PROC_SLOCK_ASSERT(p, MA_OWNED); 108479799053SKonstantin Belousov if (mode == SINGLE_EXIT) 108579799053SKonstantin Belousov remaining = p->p_numthreads; 108679799053SKonstantin Belousov else if (mode == SINGLE_BOUNDARY) 108779799053SKonstantin Belousov remaining = p->p_numthreads - p->p_boundary_count; 10886ddcc233SKonstantin Belousov else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC) 108979799053SKonstantin Belousov remaining = p->p_numthreads - p->p_suspcount; 109079799053SKonstantin Belousov else 109179799053SKonstantin Belousov panic("calc_remaining: wrong mode %d", mode); 109279799053SKonstantin Belousov return (remaining); 109379799053SKonstantin Belousov } 109479799053SKonstantin Belousov 109507a9368aSKonstantin Belousov static int 109607a9368aSKonstantin Belousov remain_for_mode(int mode) 109707a9368aSKonstantin Belousov { 109807a9368aSKonstantin Belousov 10996ddcc233SKonstantin Belousov return (mode == SINGLE_ALLPROC ? 0 : 1); 110007a9368aSKonstantin Belousov } 110107a9368aSKonstantin Belousov 110207a9368aSKonstantin Belousov static int 110307a9368aSKonstantin Belousov weed_inhib(int mode, struct thread *td2, struct proc *p) 110407a9368aSKonstantin Belousov { 110507a9368aSKonstantin Belousov int wakeup_swapper; 110607a9368aSKonstantin Belousov 110707a9368aSKonstantin Belousov PROC_LOCK_ASSERT(p, MA_OWNED); 110807a9368aSKonstantin Belousov PROC_SLOCK_ASSERT(p, MA_OWNED); 110907a9368aSKonstantin Belousov THREAD_LOCK_ASSERT(td2, MA_OWNED); 111007a9368aSKonstantin Belousov 111107a9368aSKonstantin Belousov wakeup_swapper = 0; 111261a74c5cSJeff Roberson 111361a74c5cSJeff Roberson /* 111461a74c5cSJeff Roberson * Since the thread lock is dropped by the scheduler we have 111561a74c5cSJeff Roberson * to retry to check for races. 111661a74c5cSJeff Roberson */ 111761a74c5cSJeff Roberson restart: 111807a9368aSKonstantin Belousov switch (mode) { 111907a9368aSKonstantin Belousov case SINGLE_EXIT: 112061a74c5cSJeff Roberson if (TD_IS_SUSPENDED(td2)) { 112184cdea97SKonstantin Belousov wakeup_swapper |= thread_unsuspend_one(td2, p, true); 112261a74c5cSJeff Roberson thread_lock(td2); 112361a74c5cSJeff Roberson goto restart; 112461a74c5cSJeff Roberson } 112561a74c5cSJeff Roberson if (TD_CAN_ABORT(td2)) { 112607a9368aSKonstantin Belousov wakeup_swapper |= sleepq_abort(td2, EINTR); 112761a74c5cSJeff Roberson return (wakeup_swapper); 112861a74c5cSJeff Roberson } 112907a9368aSKonstantin Belousov break; 113007a9368aSKonstantin Belousov case SINGLE_BOUNDARY: 113107a9368aSKonstantin Belousov case SINGLE_NO_EXIT: 113261a74c5cSJeff Roberson if (TD_IS_SUSPENDED(td2) && 113361a74c5cSJeff Roberson (td2->td_flags & TDF_BOUNDARY) == 0) { 113484cdea97SKonstantin Belousov wakeup_swapper |= thread_unsuspend_one(td2, p, false); 113561a74c5cSJeff Roberson thread_lock(td2); 113661a74c5cSJeff Roberson goto restart; 113761a74c5cSJeff Roberson } 113861a74c5cSJeff Roberson if (TD_CAN_ABORT(td2)) { 113907a9368aSKonstantin Belousov wakeup_swapper |= sleepq_abort(td2, ERESTART); 114061a74c5cSJeff Roberson return (wakeup_swapper); 114161a74c5cSJeff Roberson } 1142917dd390SKonstantin Belousov break; 11436ddcc233SKonstantin Belousov case SINGLE_ALLPROC: 11446ddcc233SKonstantin Belousov /* 11456ddcc233SKonstantin Belousov * ALLPROC suspend tries to avoid spurious EINTR for 11466ddcc233SKonstantin Belousov * threads sleeping interruptable, by suspending the 11476ddcc233SKonstantin Belousov * thread directly, similarly to sig_suspend_threads(). 11486ddcc233SKonstantin Belousov * Since such sleep is not performed at the user 11496ddcc233SKonstantin Belousov * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP 11506ddcc233SKonstantin Belousov * is used to avoid immediate un-suspend. 11516ddcc233SKonstantin Belousov */ 11526ddcc233SKonstantin Belousov if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY | 115361a74c5cSJeff Roberson TDF_ALLPROCSUSP)) == 0) { 115484cdea97SKonstantin Belousov wakeup_swapper |= thread_unsuspend_one(td2, p, false); 115561a74c5cSJeff Roberson thread_lock(td2); 115661a74c5cSJeff Roberson goto restart; 115761a74c5cSJeff Roberson } 115861a74c5cSJeff Roberson if (TD_CAN_ABORT(td2)) { 11596ddcc233SKonstantin Belousov if ((td2->td_flags & TDF_SBDRY) == 0) { 11606ddcc233SKonstantin Belousov thread_suspend_one(td2); 11616ddcc233SKonstantin Belousov td2->td_flags |= TDF_ALLPROCSUSP; 11626ddcc233SKonstantin Belousov } else { 11636ddcc233SKonstantin Belousov wakeup_swapper |= sleepq_abort(td2, ERESTART); 116461a74c5cSJeff Roberson return (wakeup_swapper); 11656ddcc233SKonstantin Belousov } 11666ddcc233SKonstantin Belousov } 116707a9368aSKonstantin Belousov break; 116861a74c5cSJeff Roberson default: 116961a74c5cSJeff Roberson break; 117007a9368aSKonstantin Belousov } 117161a74c5cSJeff Roberson thread_unlock(td2); 117207a9368aSKonstantin Belousov return (wakeup_swapper); 117307a9368aSKonstantin Belousov } 117407a9368aSKonstantin Belousov 11755215b187SJeff Roberson /* 117644990b8cSJulian Elischer * Enforce single-threading. 117744990b8cSJulian Elischer * 117844990b8cSJulian Elischer * Returns 1 if the caller must abort (another thread is waiting to 117944990b8cSJulian Elischer * exit the process or similar). Process is locked! 118044990b8cSJulian Elischer * Returns 0 when you are successfully the only thread running. 118144990b8cSJulian Elischer * A process has successfully single threaded in the suspend mode when 118244990b8cSJulian Elischer * There are no threads in user mode. Threads in the kernel must be 118344990b8cSJulian Elischer * allowed to continue until they get to the user boundary. They may even 118444990b8cSJulian Elischer * copy out their return values and data before suspending. They may however be 1185e2668f55SMaxim Konovalov * accelerated in reaching the user boundary as we will wake up 118644990b8cSJulian Elischer * any sleeping threads that are interruptable. (PCATCH). 118744990b8cSJulian Elischer */ 118844990b8cSJulian Elischer int 11896ddcc233SKonstantin Belousov thread_single(struct proc *p, int mode) 119044990b8cSJulian Elischer { 119144990b8cSJulian Elischer struct thread *td; 119244990b8cSJulian Elischer struct thread *td2; 1193da7bbd2cSJohn Baldwin int remaining, wakeup_swapper; 119444990b8cSJulian Elischer 119544990b8cSJulian Elischer td = curthread; 11966ddcc233SKonstantin Belousov KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY || 11976ddcc233SKonstantin Belousov mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT, 11986ddcc233SKonstantin Belousov ("invalid mode %d", mode)); 11996ddcc233SKonstantin Belousov /* 12006ddcc233SKonstantin Belousov * If allowing non-ALLPROC singlethreading for non-curproc 12016ddcc233SKonstantin Belousov * callers, calc_remaining() and remain_for_mode() should be 12026ddcc233SKonstantin Belousov * adjusted to also account for td->td_proc != p. For now 12036ddcc233SKonstantin Belousov * this is not implemented because it is not used. 12046ddcc233SKonstantin Belousov */ 12056ddcc233SKonstantin Belousov KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) || 12066ddcc233SKonstantin Belousov (mode != SINGLE_ALLPROC && td->td_proc == p), 12076ddcc233SKonstantin Belousov ("mode %d proc %p curproc %p", mode, p, td->td_proc)); 120837814395SPeter Wemm mtx_assert(&Giant, MA_NOTOWNED); 120944990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 121044990b8cSJulian Elischer 12116ddcc233SKonstantin Belousov if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC) 121244990b8cSJulian Elischer return (0); 121344990b8cSJulian Elischer 1214e3b9bf71SJulian Elischer /* Is someone already single threading? */ 1215906ac69dSDavid Xu if (p->p_singlethread != NULL && p->p_singlethread != td) 121644990b8cSJulian Elischer return (1); 121744990b8cSJulian Elischer 1218906ac69dSDavid Xu if (mode == SINGLE_EXIT) { 1219906ac69dSDavid Xu p->p_flag |= P_SINGLE_EXIT; 1220906ac69dSDavid Xu p->p_flag &= ~P_SINGLE_BOUNDARY; 1221906ac69dSDavid Xu } else { 1222906ac69dSDavid Xu p->p_flag &= ~P_SINGLE_EXIT; 1223906ac69dSDavid Xu if (mode == SINGLE_BOUNDARY) 1224906ac69dSDavid Xu p->p_flag |= P_SINGLE_BOUNDARY; 1225906ac69dSDavid Xu else 1226906ac69dSDavid Xu p->p_flag &= ~P_SINGLE_BOUNDARY; 1227906ac69dSDavid Xu } 12286ddcc233SKonstantin Belousov if (mode == SINGLE_ALLPROC) 12296ddcc233SKonstantin Belousov p->p_flag |= P_TOTAL_STOP; 12301279572aSDavid Xu p->p_flag |= P_STOPPED_SINGLE; 12317b4a950aSDavid Xu PROC_SLOCK(p); 1232112afcb2SJohn Baldwin p->p_singlethread = td; 123379799053SKonstantin Belousov remaining = calc_remaining(p, mode); 123407a9368aSKonstantin Belousov while (remaining != remain_for_mode(mode)) { 1235bf1a3220SDavid Xu if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE) 1236bf1a3220SDavid Xu goto stopme; 1237da7bbd2cSJohn Baldwin wakeup_swapper = 0; 123844990b8cSJulian Elischer FOREACH_THREAD_IN_PROC(p, td2) { 123944990b8cSJulian Elischer if (td2 == td) 124044990b8cSJulian Elischer continue; 1241a54e85fdSJeff Roberson thread_lock(td2); 1242b7edba77SJeff Roberson td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; 12436ddcc233SKonstantin Belousov if (TD_IS_INHIBITED(td2)) { 124407a9368aSKonstantin Belousov wakeup_swapper |= weed_inhib(mode, td2, p); 1245d8267df7SDavid Xu #ifdef SMP 12466ddcc233SKonstantin Belousov } else if (TD_IS_RUNNING(td2) && td != td2) { 1247d8267df7SDavid Xu forward_signal(td2); 124861a74c5cSJeff Roberson thread_unlock(td2); 1249d8267df7SDavid Xu #endif 125061a74c5cSJeff Roberson } else 1251a54e85fdSJeff Roberson thread_unlock(td2); 12529d102777SJulian Elischer } 1253da7bbd2cSJohn Baldwin if (wakeup_swapper) 1254da7bbd2cSJohn Baldwin kick_proc0(); 125579799053SKonstantin Belousov remaining = calc_remaining(p, mode); 1256ec008e96SDavid Xu 12579d102777SJulian Elischer /* 12589d102777SJulian Elischer * Maybe we suspended some threads.. was it enough? 12599d102777SJulian Elischer */ 126007a9368aSKonstantin Belousov if (remaining == remain_for_mode(mode)) 12619d102777SJulian Elischer break; 12629d102777SJulian Elischer 1263bf1a3220SDavid Xu stopme: 126444990b8cSJulian Elischer /* 126544990b8cSJulian Elischer * Wake us up when everyone else has suspended. 1266e3b9bf71SJulian Elischer * In the mean time we suspend as well. 126744990b8cSJulian Elischer */ 12686ddcc233SKonstantin Belousov thread_suspend_switch(td, p); 126979799053SKonstantin Belousov remaining = calc_remaining(p, mode); 127044990b8cSJulian Elischer } 1271906ac69dSDavid Xu if (mode == SINGLE_EXIT) { 127291599697SJulian Elischer /* 12738626a0ddSKonstantin Belousov * Convert the process to an unthreaded process. The 12748626a0ddSKonstantin Belousov * SINGLE_EXIT is called by exit1() or execve(), in 12758626a0ddSKonstantin Belousov * both cases other threads must be retired. 127691599697SJulian Elischer */ 12778626a0ddSKonstantin Belousov KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads")); 1278ed062c8dSJulian Elischer p->p_singlethread = NULL; 12798626a0ddSKonstantin Belousov p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS); 1280fd229b5bSKonstantin Belousov 1281fd229b5bSKonstantin Belousov /* 1282fd229b5bSKonstantin Belousov * Wait for any remaining threads to exit cpu_throw(). 1283fd229b5bSKonstantin Belousov */ 1284fd229b5bSKonstantin Belousov while (p->p_exitthreads != 0) { 1285fd229b5bSKonstantin Belousov PROC_SUNLOCK(p); 1286fd229b5bSKonstantin Belousov PROC_UNLOCK(p); 1287fd229b5bSKonstantin Belousov sched_relinquish(td); 1288fd229b5bSKonstantin Belousov PROC_LOCK(p); 1289fd229b5bSKonstantin Belousov PROC_SLOCK(p); 1290fd229b5bSKonstantin Belousov } 1291ac437c07SKonstantin Belousov } else if (mode == SINGLE_BOUNDARY) { 1292ac437c07SKonstantin Belousov /* 1293ac437c07SKonstantin Belousov * Wait until all suspended threads are removed from 1294ac437c07SKonstantin Belousov * the processors. The thread_suspend_check() 1295ac437c07SKonstantin Belousov * increments p_boundary_count while it is still 1296ac437c07SKonstantin Belousov * running, which makes it possible for the execve() 1297ac437c07SKonstantin Belousov * to destroy vmspace while our other threads are 1298ac437c07SKonstantin Belousov * still using the address space. 1299ac437c07SKonstantin Belousov * 1300ac437c07SKonstantin Belousov * We lock the thread, which is only allowed to 1301ac437c07SKonstantin Belousov * succeed after context switch code finished using 1302ac437c07SKonstantin Belousov * the address space. 1303ac437c07SKonstantin Belousov */ 1304ac437c07SKonstantin Belousov FOREACH_THREAD_IN_PROC(p, td2) { 1305ac437c07SKonstantin Belousov if (td2 == td) 1306ac437c07SKonstantin Belousov continue; 1307ac437c07SKonstantin Belousov thread_lock(td2); 1308ac437c07SKonstantin Belousov KASSERT((td2->td_flags & TDF_BOUNDARY) != 0, 1309ac437c07SKonstantin Belousov ("td %p not on boundary", td2)); 1310ac437c07SKonstantin Belousov KASSERT(TD_IS_SUSPENDED(td2), 1311ac437c07SKonstantin Belousov ("td %p is not suspended", td2)); 1312ac437c07SKonstantin Belousov thread_unlock(td2); 1313ac437c07SKonstantin Belousov } 131491599697SJulian Elischer } 13157b4a950aSDavid Xu PROC_SUNLOCK(p); 131644990b8cSJulian Elischer return (0); 131744990b8cSJulian Elischer } 131844990b8cSJulian Elischer 13198638fe7bSKonstantin Belousov bool 13208638fe7bSKonstantin Belousov thread_suspend_check_needed(void) 13218638fe7bSKonstantin Belousov { 13228638fe7bSKonstantin Belousov struct proc *p; 13238638fe7bSKonstantin Belousov struct thread *td; 13248638fe7bSKonstantin Belousov 13258638fe7bSKonstantin Belousov td = curthread; 13268638fe7bSKonstantin Belousov p = td->td_proc; 13278638fe7bSKonstantin Belousov PROC_LOCK_ASSERT(p, MA_OWNED); 13288638fe7bSKonstantin Belousov return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 && 13298638fe7bSKonstantin Belousov (td->td_dbgflags & TDB_SUSPEND) != 0)); 13308638fe7bSKonstantin Belousov } 13318638fe7bSKonstantin Belousov 133244990b8cSJulian Elischer /* 133344990b8cSJulian Elischer * Called in from locations that can safely check to see 133444990b8cSJulian Elischer * whether we have to suspend or at least throttle for a 133544990b8cSJulian Elischer * single-thread event (e.g. fork). 133644990b8cSJulian Elischer * 133744990b8cSJulian Elischer * Such locations include userret(). 133844990b8cSJulian Elischer * If the "return_instead" argument is non zero, the thread must be able to 133944990b8cSJulian Elischer * accept 0 (caller may continue), or 1 (caller must abort) as a result. 134044990b8cSJulian Elischer * 134144990b8cSJulian Elischer * The 'return_instead' argument tells the function if it may do a 134244990b8cSJulian Elischer * thread_exit() or suspend, or whether the caller must abort and back 134344990b8cSJulian Elischer * out instead. 134444990b8cSJulian Elischer * 134544990b8cSJulian Elischer * If the thread that set the single_threading request has set the 134644990b8cSJulian Elischer * P_SINGLE_EXIT bit in the process flags then this call will never return 134744990b8cSJulian Elischer * if 'return_instead' is false, but will exit. 134844990b8cSJulian Elischer * 134944990b8cSJulian Elischer * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 135044990b8cSJulian Elischer *---------------+--------------------+--------------------- 135144990b8cSJulian Elischer * 0 | returns 0 | returns 0 or 1 1352353374b5SJohn Baldwin * | when ST ends | immediately 135344990b8cSJulian Elischer *---------------+--------------------+--------------------- 135444990b8cSJulian Elischer * 1 | thread exits | returns 1 1355353374b5SJohn Baldwin * | | immediately 135644990b8cSJulian Elischer * 0 = thread_exit() or suspension ok, 135744990b8cSJulian Elischer * other = return error instead of stopping the thread. 135844990b8cSJulian Elischer * 135944990b8cSJulian Elischer * While a full suspension is under effect, even a single threading 136044990b8cSJulian Elischer * thread would be suspended if it made this call (but it shouldn't). 136144990b8cSJulian Elischer * This call should only be made from places where 136244990b8cSJulian Elischer * thread_exit() would be safe as that may be the outcome unless 136344990b8cSJulian Elischer * return_instead is set. 136444990b8cSJulian Elischer */ 136544990b8cSJulian Elischer int 136644990b8cSJulian Elischer thread_suspend_check(int return_instead) 136744990b8cSJulian Elischer { 1368ecafb24bSJuli Mallett struct thread *td; 1369ecafb24bSJuli Mallett struct proc *p; 137046e47c4fSKonstantin Belousov int wakeup_swapper; 137144990b8cSJulian Elischer 137244990b8cSJulian Elischer td = curthread; 137344990b8cSJulian Elischer p = td->td_proc; 137437814395SPeter Wemm mtx_assert(&Giant, MA_NOTOWNED); 137544990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 13768638fe7bSKonstantin Belousov while (thread_suspend_check_needed()) { 13771279572aSDavid Xu if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 137844990b8cSJulian Elischer KASSERT(p->p_singlethread != NULL, 137944990b8cSJulian Elischer ("singlethread not set")); 138044990b8cSJulian Elischer /* 1381e3b9bf71SJulian Elischer * The only suspension in action is a 1382e3b9bf71SJulian Elischer * single-threading. Single threader need not stop. 1383bd07998eSKonstantin Belousov * It is safe to access p->p_singlethread unlocked 1384bd07998eSKonstantin Belousov * because it can only be set to our address by us. 138544990b8cSJulian Elischer */ 1386e3b9bf71SJulian Elischer if (p->p_singlethread == td) 138744990b8cSJulian Elischer return (0); /* Exempt from stopping. */ 138844990b8cSJulian Elischer } 138945a4bfa1SDavid Xu if ((p->p_flag & P_SINGLE_EXIT) && return_instead) 139094f0972bSDavid Xu return (EINTR); 139144990b8cSJulian Elischer 1392906ac69dSDavid Xu /* Should we goto user boundary if we didn't come from there? */ 1393906ac69dSDavid Xu if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE && 1394906ac69dSDavid Xu (p->p_flag & P_SINGLE_BOUNDARY) && return_instead) 139594f0972bSDavid Xu return (ERESTART); 1396906ac69dSDavid Xu 139744990b8cSJulian Elischer /* 13983077f938SKonstantin Belousov * Ignore suspend requests if they are deferred. 1399d071a6faSJohn Baldwin */ 14003077f938SKonstantin Belousov if ((td->td_flags & TDF_SBDRY) != 0) { 1401d071a6faSJohn Baldwin KASSERT(return_instead, 1402d071a6faSJohn Baldwin ("TDF_SBDRY set for unsafe thread_suspend_check")); 140346e47c4fSKonstantin Belousov KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) != 140446e47c4fSKonstantin Belousov (TDF_SEINTR | TDF_SERESTART), 140546e47c4fSKonstantin Belousov ("both TDF_SEINTR and TDF_SERESTART")); 140646e47c4fSKonstantin Belousov return (TD_SBDRY_INTR(td) ? TD_SBDRY_ERRNO(td) : 0); 1407d071a6faSJohn Baldwin } 1408d071a6faSJohn Baldwin 1409d071a6faSJohn Baldwin /* 141044990b8cSJulian Elischer * If the process is waiting for us to exit, 141144990b8cSJulian Elischer * this thread should just suicide. 14121279572aSDavid Xu * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 141344990b8cSJulian Elischer */ 1414cf7d9a8cSDavid Xu if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1415cf7d9a8cSDavid Xu PROC_UNLOCK(p); 141691d1786fSDmitry Chagin 141791d1786fSDmitry Chagin /* 141891d1786fSDmitry Chagin * Allow Linux emulation layer to do some work 141991d1786fSDmitry Chagin * before thread suicide. 142091d1786fSDmitry Chagin */ 142191d1786fSDmitry Chagin if (__predict_false(p->p_sysent->sv_thread_detach != NULL)) 142291d1786fSDmitry Chagin (p->p_sysent->sv_thread_detach)(td); 14232a339d9eSKonstantin Belousov umtx_thread_exit(td); 1424d1e7a4a5SJohn Baldwin kern_thr_exit(td); 1425d1e7a4a5SJohn Baldwin panic("stopped thread did not exit"); 1426cf7d9a8cSDavid Xu } 142721ecd1e9SDavid Xu 142821ecd1e9SDavid Xu PROC_SLOCK(p); 142921ecd1e9SDavid Xu thread_stopped(p); 1430a54e85fdSJeff Roberson if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1431a54e85fdSJeff Roberson if (p->p_numthreads == p->p_suspcount + 1) { 1432a54e85fdSJeff Roberson thread_lock(p->p_singlethread); 143384cdea97SKonstantin Belousov wakeup_swapper = thread_unsuspend_one( 143484cdea97SKonstantin Belousov p->p_singlethread, p, false); 14357847a9daSJohn Baldwin if (wakeup_swapper) 14367847a9daSJohn Baldwin kick_proc0(); 1437a54e85fdSJeff Roberson } 1438a54e85fdSJeff Roberson } 14393f9be10eSDavid Xu PROC_UNLOCK(p); 14407b4a950aSDavid Xu thread_lock(td); 144144990b8cSJulian Elischer /* 144244990b8cSJulian Elischer * When a thread suspends, it just 1443ad1e7d28SJulian Elischer * gets taken off all queues. 144444990b8cSJulian Elischer */ 144571fad9fdSJulian Elischer thread_suspend_one(td); 1446906ac69dSDavid Xu if (return_instead == 0) { 1447906ac69dSDavid Xu p->p_boundary_count++; 1448906ac69dSDavid Xu td->td_flags |= TDF_BOUNDARY; 1449cf19bf91SJulian Elischer } 14507b4a950aSDavid Xu PROC_SUNLOCK(p); 1451686bcb5cSJeff Roberson mi_switch(SW_INVOL | SWT_SUSPEND); 145244990b8cSJulian Elischer PROC_LOCK(p); 145344990b8cSJulian Elischer } 145444990b8cSJulian Elischer return (0); 145544990b8cSJulian Elischer } 145644990b8cSJulian Elischer 1457478ca4b0SKonstantin Belousov /* 1458478ca4b0SKonstantin Belousov * Check for possible stops and suspensions while executing a 1459478ca4b0SKonstantin Belousov * casueword or similar transiently failing operation. 1460478ca4b0SKonstantin Belousov * 1461478ca4b0SKonstantin Belousov * The sleep argument controls whether the function can handle a stop 1462478ca4b0SKonstantin Belousov * request itself or it should return ERESTART and the request is 1463478ca4b0SKonstantin Belousov * proceed at the kernel/user boundary in ast. 1464478ca4b0SKonstantin Belousov * 1465478ca4b0SKonstantin Belousov * Typically, when retrying due to casueword(9) failure (rv == 1), we 1466478ca4b0SKonstantin Belousov * should handle the stop requests there, with exception of cases when 1467478ca4b0SKonstantin Belousov * the thread owns a kernel resource, for instance busied the umtx 1468300b525dSKonstantin Belousov * key, or when functions return immediately if thread_check_susp() 1469478ca4b0SKonstantin Belousov * returned non-zero. On the other hand, retrying the whole lock 1470478ca4b0SKonstantin Belousov * operation, we better not stop there but delegate the handling to 1471478ca4b0SKonstantin Belousov * ast. 1472478ca4b0SKonstantin Belousov * 1473478ca4b0SKonstantin Belousov * If the request is for thread termination P_SINGLE_EXIT, we cannot 1474478ca4b0SKonstantin Belousov * handle it at all, and simply return EINTR. 1475478ca4b0SKonstantin Belousov */ 1476478ca4b0SKonstantin Belousov int 1477478ca4b0SKonstantin Belousov thread_check_susp(struct thread *td, bool sleep) 1478478ca4b0SKonstantin Belousov { 1479478ca4b0SKonstantin Belousov struct proc *p; 1480478ca4b0SKonstantin Belousov int error; 1481478ca4b0SKonstantin Belousov 1482478ca4b0SKonstantin Belousov /* 1483478ca4b0SKonstantin Belousov * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 1484478ca4b0SKonstantin Belousov * eventually break the lockstep loop. 1485478ca4b0SKonstantin Belousov */ 1486478ca4b0SKonstantin Belousov if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 1487478ca4b0SKonstantin Belousov return (0); 1488478ca4b0SKonstantin Belousov error = 0; 1489478ca4b0SKonstantin Belousov p = td->td_proc; 1490478ca4b0SKonstantin Belousov PROC_LOCK(p); 1491478ca4b0SKonstantin Belousov if (p->p_flag & P_SINGLE_EXIT) 1492478ca4b0SKonstantin Belousov error = EINTR; 1493478ca4b0SKonstantin Belousov else if (P_SHOULDSTOP(p) || 1494478ca4b0SKonstantin Belousov ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) 1495478ca4b0SKonstantin Belousov error = sleep ? thread_suspend_check(0) : ERESTART; 1496478ca4b0SKonstantin Belousov PROC_UNLOCK(p); 1497478ca4b0SKonstantin Belousov return (error); 1498478ca4b0SKonstantin Belousov } 1499478ca4b0SKonstantin Belousov 150035c32a76SDavid Xu void 15016ddcc233SKonstantin Belousov thread_suspend_switch(struct thread *td, struct proc *p) 1502a54e85fdSJeff Roberson { 1503a54e85fdSJeff Roberson 1504a54e85fdSJeff Roberson KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); 1505a54e85fdSJeff Roberson PROC_LOCK_ASSERT(p, MA_OWNED); 15067b4a950aSDavid Xu PROC_SLOCK_ASSERT(p, MA_OWNED); 1507a54e85fdSJeff Roberson /* 1508a54e85fdSJeff Roberson * We implement thread_suspend_one in stages here to avoid 1509a54e85fdSJeff Roberson * dropping the proc lock while the thread lock is owned. 1510a54e85fdSJeff Roberson */ 15116ddcc233SKonstantin Belousov if (p == td->td_proc) { 1512a54e85fdSJeff Roberson thread_stopped(p); 1513a54e85fdSJeff Roberson p->p_suspcount++; 15146ddcc233SKonstantin Belousov } 15153f9be10eSDavid Xu PROC_UNLOCK(p); 15167b4a950aSDavid Xu thread_lock(td); 1517b7edba77SJeff Roberson td->td_flags &= ~TDF_NEEDSUSPCHK; 1518a54e85fdSJeff Roberson TD_SET_SUSPENDED(td); 1519c5aa6b58SJeff Roberson sched_sleep(td, 0); 15207b4a950aSDavid Xu PROC_SUNLOCK(p); 1521a54e85fdSJeff Roberson DROP_GIANT(); 1522686bcb5cSJeff Roberson mi_switch(SW_VOL | SWT_SUSPEND); 1523a54e85fdSJeff Roberson PICKUP_GIANT(); 1524a54e85fdSJeff Roberson PROC_LOCK(p); 15257b4a950aSDavid Xu PROC_SLOCK(p); 1526a54e85fdSJeff Roberson } 1527a54e85fdSJeff Roberson 1528a54e85fdSJeff Roberson void 152935c32a76SDavid Xu thread_suspend_one(struct thread *td) 153035c32a76SDavid Xu { 15316ddcc233SKonstantin Belousov struct proc *p; 153235c32a76SDavid Xu 15336ddcc233SKonstantin Belousov p = td->td_proc; 15347b4a950aSDavid Xu PROC_SLOCK_ASSERT(p, MA_OWNED); 1535a54e85fdSJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 1536e574e444SDavid Xu KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); 153735c32a76SDavid Xu p->p_suspcount++; 1538b7edba77SJeff Roberson td->td_flags &= ~TDF_NEEDSUSPCHK; 153971fad9fdSJulian Elischer TD_SET_SUSPENDED(td); 1540c5aa6b58SJeff Roberson sched_sleep(td, 0); 154135c32a76SDavid Xu } 154235c32a76SDavid Xu 154384cdea97SKonstantin Belousov static int 154484cdea97SKonstantin Belousov thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary) 154535c32a76SDavid Xu { 154635c32a76SDavid Xu 1547a54e85fdSJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 1548ad1e7d28SJulian Elischer KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended")); 154971fad9fdSJulian Elischer TD_CLR_SUSPENDED(td); 15506ddcc233SKonstantin Belousov td->td_flags &= ~TDF_ALLPROCSUSP; 15516ddcc233SKonstantin Belousov if (td->td_proc == p) { 15526ddcc233SKonstantin Belousov PROC_SLOCK_ASSERT(p, MA_OWNED); 155335c32a76SDavid Xu p->p_suspcount--; 155484cdea97SKonstantin Belousov if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) { 155584cdea97SKonstantin Belousov td->td_flags &= ~TDF_BOUNDARY; 155684cdea97SKonstantin Belousov p->p_boundary_count--; 155784cdea97SKonstantin Belousov } 15586ddcc233SKonstantin Belousov } 155961a74c5cSJeff Roberson return (setrunnable(td, 0)); 156035c32a76SDavid Xu } 156135c32a76SDavid Xu 1562af928fdeSKonstantin Belousov void 1563af928fdeSKonstantin Belousov thread_run_flash(struct thread *td) 1564af928fdeSKonstantin Belousov { 1565af928fdeSKonstantin Belousov struct proc *p; 1566af928fdeSKonstantin Belousov 1567af928fdeSKonstantin Belousov p = td->td_proc; 1568af928fdeSKonstantin Belousov PROC_LOCK_ASSERT(p, MA_OWNED); 1569af928fdeSKonstantin Belousov 1570af928fdeSKonstantin Belousov if (TD_ON_SLEEPQ(td)) 1571af928fdeSKonstantin Belousov sleepq_remove_nested(td); 1572af928fdeSKonstantin Belousov else 1573af928fdeSKonstantin Belousov thread_lock(td); 1574af928fdeSKonstantin Belousov 1575af928fdeSKonstantin Belousov THREAD_LOCK_ASSERT(td, MA_OWNED); 1576af928fdeSKonstantin Belousov KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended")); 1577af928fdeSKonstantin Belousov 1578af928fdeSKonstantin Belousov TD_CLR_SUSPENDED(td); 1579af928fdeSKonstantin Belousov PROC_SLOCK(p); 1580af928fdeSKonstantin Belousov MPASS(p->p_suspcount > 0); 1581af928fdeSKonstantin Belousov p->p_suspcount--; 1582af928fdeSKonstantin Belousov PROC_SUNLOCK(p); 1583af928fdeSKonstantin Belousov if (setrunnable(td, 0)) 1584af928fdeSKonstantin Belousov kick_proc0(); 1585af928fdeSKonstantin Belousov } 1586af928fdeSKonstantin Belousov 158744990b8cSJulian Elischer /* 158844990b8cSJulian Elischer * Allow all threads blocked by single threading to continue running. 158944990b8cSJulian Elischer */ 159044990b8cSJulian Elischer void 159144990b8cSJulian Elischer thread_unsuspend(struct proc *p) 159244990b8cSJulian Elischer { 159344990b8cSJulian Elischer struct thread *td; 15947847a9daSJohn Baldwin int wakeup_swapper; 159544990b8cSJulian Elischer 159644990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 15977b4a950aSDavid Xu PROC_SLOCK_ASSERT(p, MA_OWNED); 15987847a9daSJohn Baldwin wakeup_swapper = 0; 159944990b8cSJulian Elischer if (!P_SHOULDSTOP(p)) { 1600ad1e7d28SJulian Elischer FOREACH_THREAD_IN_PROC(p, td) { 1601a54e85fdSJeff Roberson thread_lock(td); 1602ad1e7d28SJulian Elischer if (TD_IS_SUSPENDED(td)) { 160384cdea97SKonstantin Belousov wakeup_swapper |= thread_unsuspend_one(td, p, 160484cdea97SKonstantin Belousov true); 160561a74c5cSJeff Roberson } else 1606a54e85fdSJeff Roberson thread_unlock(td); 1607ad1e7d28SJulian Elischer } 160884cdea97SKonstantin Belousov } else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE && 160984cdea97SKonstantin Belousov p->p_numthreads == p->p_suspcount) { 161044990b8cSJulian Elischer /* 161144990b8cSJulian Elischer * Stopping everything also did the job for the single 161244990b8cSJulian Elischer * threading request. Now we've downgraded to single-threaded, 161344990b8cSJulian Elischer * let it continue. 161444990b8cSJulian Elischer */ 16156ddcc233SKonstantin Belousov if (p->p_singlethread->td_proc == p) { 1616a54e85fdSJeff Roberson thread_lock(p->p_singlethread); 16176ddcc233SKonstantin Belousov wakeup_swapper = thread_unsuspend_one( 161884cdea97SKonstantin Belousov p->p_singlethread, p, false); 161944990b8cSJulian Elischer } 16206ddcc233SKonstantin Belousov } 16217847a9daSJohn Baldwin if (wakeup_swapper) 16227847a9daSJohn Baldwin kick_proc0(); 162344990b8cSJulian Elischer } 162444990b8cSJulian Elischer 1625ed062c8dSJulian Elischer /* 1626ed062c8dSJulian Elischer * End the single threading mode.. 1627ed062c8dSJulian Elischer */ 162844990b8cSJulian Elischer void 16296ddcc233SKonstantin Belousov thread_single_end(struct proc *p, int mode) 163044990b8cSJulian Elischer { 163144990b8cSJulian Elischer struct thread *td; 16327847a9daSJohn Baldwin int wakeup_swapper; 163344990b8cSJulian Elischer 16346ddcc233SKonstantin Belousov KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY || 16356ddcc233SKonstantin Belousov mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT, 16366ddcc233SKonstantin Belousov ("invalid mode %d", mode)); 163744990b8cSJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 16386ddcc233SKonstantin Belousov KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) || 16396ddcc233SKonstantin Belousov (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0), 16406ddcc233SKonstantin Belousov ("mode %d does not match P_TOTAL_STOP", mode)); 164184cdea97SKonstantin Belousov KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread, 164284cdea97SKonstantin Belousov ("thread_single_end from other thread %p %p", 164384cdea97SKonstantin Belousov curthread, p->p_singlethread)); 164484cdea97SKonstantin Belousov KASSERT(mode != SINGLE_BOUNDARY || 164584cdea97SKonstantin Belousov (p->p_flag & P_SINGLE_BOUNDARY) != 0, 164684cdea97SKonstantin Belousov ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag)); 16476ddcc233SKonstantin Belousov p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY | 16486ddcc233SKonstantin Belousov P_TOTAL_STOP); 16497b4a950aSDavid Xu PROC_SLOCK(p); 165044990b8cSJulian Elischer p->p_singlethread = NULL; 16517847a9daSJohn Baldwin wakeup_swapper = 0; 165249539972SJulian Elischer /* 16537847a9daSJohn Baldwin * If there are other threads they may now run, 165449539972SJulian Elischer * unless of course there is a blanket 'stop order' 165549539972SJulian Elischer * on the process. The single threader must be allowed 165649539972SJulian Elischer * to continue however as this is a bad place to stop. 165749539972SJulian Elischer */ 16586ddcc233SKonstantin Belousov if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) { 1659ad1e7d28SJulian Elischer FOREACH_THREAD_IN_PROC(p, td) { 1660a54e85fdSJeff Roberson thread_lock(td); 1661ad1e7d28SJulian Elischer if (TD_IS_SUSPENDED(td)) { 166284cdea97SKonstantin Belousov wakeup_swapper |= thread_unsuspend_one(td, p, 166384cdea97SKonstantin Belousov mode == SINGLE_BOUNDARY); 166461a74c5cSJeff Roberson } else 1665a54e85fdSJeff Roberson thread_unlock(td); 166649539972SJulian Elischer } 1667ad1e7d28SJulian Elischer } 166884cdea97SKonstantin Belousov KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0, 166984cdea97SKonstantin Belousov ("inconsistent boundary count %d", p->p_boundary_count)); 16707b4a950aSDavid Xu PROC_SUNLOCK(p); 16717847a9daSJohn Baldwin if (wakeup_swapper) 16727847a9daSJohn Baldwin kick_proc0(); 167349539972SJulian Elischer } 16744fc21c09SDaniel Eischen 1675aae3547bSMateusz Guzik /* 1676aae3547bSMateusz Guzik * Locate a thread by number and return with proc lock held. 1677aae3547bSMateusz Guzik * 1678aae3547bSMateusz Guzik * thread exit establishes proc -> tidhash lock ordering, but lookup 1679aae3547bSMateusz Guzik * takes tidhash first and needs to return locked proc. 1680aae3547bSMateusz Guzik * 1681aae3547bSMateusz Guzik * The problem is worked around by relying on type-safety of both 1682aae3547bSMateusz Guzik * structures and doing the work in 2 steps: 1683aae3547bSMateusz Guzik * - tidhash-locked lookup which saves both thread and proc pointers 1684aae3547bSMateusz Guzik * - proc-locked verification that the found thread still matches 1685aae3547bSMateusz Guzik */ 1686aae3547bSMateusz Guzik static bool 1687aae3547bSMateusz Guzik tdfind_hash(lwpid_t tid, pid_t pid, struct proc **pp, struct thread **tdp) 1688cf7d9a8cSDavid Xu { 1689cf7d9a8cSDavid Xu #define RUN_THRESH 16 1690aae3547bSMateusz Guzik struct proc *p; 1691cf7d9a8cSDavid Xu struct thread *td; 1692aae3547bSMateusz Guzik int run; 1693aae3547bSMateusz Guzik bool locked; 1694cf7d9a8cSDavid Xu 1695aae3547bSMateusz Guzik run = 0; 169626007fe3SMateusz Guzik rw_rlock(TIDHASHLOCK(tid)); 1697aae3547bSMateusz Guzik locked = true; 1698cf7d9a8cSDavid Xu LIST_FOREACH(td, TIDHASH(tid), td_hash) { 1699aae3547bSMateusz Guzik if (td->td_tid != tid) { 1700aae3547bSMateusz Guzik run++; 1701aae3547bSMateusz Guzik continue; 1702cf7d9a8cSDavid Xu } 1703aae3547bSMateusz Guzik p = td->td_proc; 1704aae3547bSMateusz Guzik if (pid != -1 && p->p_pid != pid) { 1705cf7d9a8cSDavid Xu td = NULL; 1706cf7d9a8cSDavid Xu break; 1707cf7d9a8cSDavid Xu } 1708cf7d9a8cSDavid Xu if (run > RUN_THRESH) { 170926007fe3SMateusz Guzik if (rw_try_upgrade(TIDHASHLOCK(tid))) { 1710cf7d9a8cSDavid Xu LIST_REMOVE(td, td_hash); 1711cf7d9a8cSDavid Xu LIST_INSERT_HEAD(TIDHASH(td->td_tid), 1712cf7d9a8cSDavid Xu td, td_hash); 171326007fe3SMateusz Guzik rw_wunlock(TIDHASHLOCK(tid)); 1714aae3547bSMateusz Guzik locked = false; 1715aae3547bSMateusz Guzik break; 1716cf7d9a8cSDavid Xu } 1717cf7d9a8cSDavid Xu } 1718cf7d9a8cSDavid Xu break; 1719cf7d9a8cSDavid Xu } 1720aae3547bSMateusz Guzik if (locked) 172126007fe3SMateusz Guzik rw_runlock(TIDHASHLOCK(tid)); 1722aae3547bSMateusz Guzik if (td == NULL) 1723aae3547bSMateusz Guzik return (false); 1724aae3547bSMateusz Guzik *pp = p; 1725aae3547bSMateusz Guzik *tdp = td; 1726aae3547bSMateusz Guzik return (true); 1727aae3547bSMateusz Guzik } 1728aae3547bSMateusz Guzik 1729aae3547bSMateusz Guzik struct thread * 1730aae3547bSMateusz Guzik tdfind(lwpid_t tid, pid_t pid) 1731aae3547bSMateusz Guzik { 1732aae3547bSMateusz Guzik struct proc *p; 1733aae3547bSMateusz Guzik struct thread *td; 1734aae3547bSMateusz Guzik 1735aae3547bSMateusz Guzik td = curthread; 1736aae3547bSMateusz Guzik if (td->td_tid == tid) { 1737aae3547bSMateusz Guzik if (pid != -1 && td->td_proc->p_pid != pid) 1738aae3547bSMateusz Guzik return (NULL); 1739aae3547bSMateusz Guzik PROC_LOCK(td->td_proc); 1740cf7d9a8cSDavid Xu return (td); 1741cf7d9a8cSDavid Xu } 1742cf7d9a8cSDavid Xu 1743aae3547bSMateusz Guzik for (;;) { 1744aae3547bSMateusz Guzik if (!tdfind_hash(tid, pid, &p, &td)) 1745aae3547bSMateusz Guzik return (NULL); 1746aae3547bSMateusz Guzik PROC_LOCK(p); 1747aae3547bSMateusz Guzik if (td->td_tid != tid) { 1748aae3547bSMateusz Guzik PROC_UNLOCK(p); 1749aae3547bSMateusz Guzik continue; 1750aae3547bSMateusz Guzik } 1751aae3547bSMateusz Guzik if (td->td_proc != p) { 1752aae3547bSMateusz Guzik PROC_UNLOCK(p); 1753aae3547bSMateusz Guzik continue; 1754aae3547bSMateusz Guzik } 1755aae3547bSMateusz Guzik if (p->p_state == PRS_NEW) { 1756aae3547bSMateusz Guzik PROC_UNLOCK(p); 1757aae3547bSMateusz Guzik return (NULL); 1758aae3547bSMateusz Guzik } 1759aae3547bSMateusz Guzik return (td); 1760aae3547bSMateusz Guzik } 1761aae3547bSMateusz Guzik } 1762aae3547bSMateusz Guzik 1763cf7d9a8cSDavid Xu void 1764cf7d9a8cSDavid Xu tidhash_add(struct thread *td) 1765cf7d9a8cSDavid Xu { 176626007fe3SMateusz Guzik rw_wlock(TIDHASHLOCK(td->td_tid)); 1767cf7d9a8cSDavid Xu LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); 176826007fe3SMateusz Guzik rw_wunlock(TIDHASHLOCK(td->td_tid)); 1769cf7d9a8cSDavid Xu } 1770cf7d9a8cSDavid Xu 1771cf7d9a8cSDavid Xu void 1772cf7d9a8cSDavid Xu tidhash_remove(struct thread *td) 1773cf7d9a8cSDavid Xu { 177426007fe3SMateusz Guzik 177526007fe3SMateusz Guzik rw_wlock(TIDHASHLOCK(td->td_tid)); 1776cf7d9a8cSDavid Xu LIST_REMOVE(td, td_hash); 177726007fe3SMateusz Guzik rw_wunlock(TIDHASHLOCK(td->td_tid)); 1778cf7d9a8cSDavid Xu } 1779