1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice(s), this list of conditions and the following disclaimer as 12 * the first lines of this file unmodified other than the possible 13 * addition of one or more copyright notices. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice(s), this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 28 * DAMAGE. 29 */ 30 31 #include "opt_witness.h" 32 #include "opt_hwpmc_hooks.h" 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/proc.h> 43 #include <sys/bitstring.h> 44 #include <sys/epoch.h> 45 #include <sys/rangelock.h> 46 #include <sys/resourcevar.h> 47 #include <sys/sdt.h> 48 #include <sys/smp.h> 49 #include <sys/sched.h> 50 #include <sys/sleepqueue.h> 51 #include <sys/selinfo.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysent.h> 54 #include <sys/turnstile.h> 55 #include <sys/taskqueue.h> 56 #include <sys/ktr.h> 57 #include <sys/rwlock.h> 58 #include <sys/umtx.h> 59 #include <sys/vmmeter.h> 60 #include <sys/cpuset.h> 61 #ifdef HWPMC_HOOKS 62 #include <sys/pmckern.h> 63 #endif 64 #include <sys/priv.h> 65 66 #include <security/audit/audit.h> 67 68 #include <vm/pmap.h> 69 #include <vm/vm.h> 70 #include <vm/vm_extern.h> 71 #include <vm/uma.h> 72 #include <vm/vm_phys.h> 73 #include <sys/eventhandler.h> 74 75 /* 76 * Asserts below verify the stability of struct thread and struct proc 77 * layout, as exposed by KBI to modules. On head, the KBI is allowed 78 * to drift, change to the structures must be accompanied by the 79 * assert update. 80 * 81 * On the stable branches after KBI freeze, conditions must not be 82 * violated. Typically new fields are moved to the end of the 83 * structures. 84 */ 85 #ifdef __amd64__ 86 _Static_assert(offsetof(struct thread, td_flags) == 0xfc, 87 "struct thread KBI td_flags"); 88 _Static_assert(offsetof(struct thread, td_pflags) == 0x104, 89 "struct thread KBI td_pflags"); 90 _Static_assert(offsetof(struct thread, td_frame) == 0x4a0, 91 "struct thread KBI td_frame"); 92 _Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0, 93 "struct thread KBI td_emuldata"); 94 _Static_assert(offsetof(struct proc, p_flag) == 0xb8, 95 "struct proc KBI p_flag"); 96 _Static_assert(offsetof(struct proc, p_pid) == 0xc4, 97 "struct proc KBI p_pid"); 98 _Static_assert(offsetof(struct proc, p_filemon) == 0x3c0, 99 "struct proc KBI p_filemon"); 100 _Static_assert(offsetof(struct proc, p_comm) == 0x3d8, 101 "struct proc KBI p_comm"); 102 _Static_assert(offsetof(struct proc, p_emuldata) == 0x4b8, 103 "struct proc KBI p_emuldata"); 104 #endif 105 #ifdef __i386__ 106 _Static_assert(offsetof(struct thread, td_flags) == 0x98, 107 "struct thread KBI td_flags"); 108 _Static_assert(offsetof(struct thread, td_pflags) == 0xa0, 109 "struct thread KBI td_pflags"); 110 _Static_assert(offsetof(struct thread, td_frame) == 0x300, 111 "struct thread KBI td_frame"); 112 _Static_assert(offsetof(struct thread, td_emuldata) == 0x344, 113 "struct thread KBI td_emuldata"); 114 _Static_assert(offsetof(struct proc, p_flag) == 0x6c, 115 "struct proc KBI p_flag"); 116 _Static_assert(offsetof(struct proc, p_pid) == 0x78, 117 "struct proc KBI p_pid"); 118 _Static_assert(offsetof(struct proc, p_filemon) == 0x26c, 119 "struct proc KBI p_filemon"); 120 _Static_assert(offsetof(struct proc, p_comm) == 0x280, 121 "struct proc KBI p_comm"); 122 _Static_assert(offsetof(struct proc, p_emuldata) == 0x30c, 123 "struct proc KBI p_emuldata"); 124 #endif 125 126 SDT_PROVIDER_DECLARE(proc); 127 SDT_PROBE_DEFINE(proc, , , lwp__exit); 128 129 /* 130 * thread related storage. 131 */ 132 static uma_zone_t thread_zone; 133 134 struct thread_domain_data { 135 struct thread *tdd_zombies; 136 int tdd_reapticks; 137 } __aligned(CACHE_LINE_SIZE); 138 139 static struct thread_domain_data thread_domain_data[MAXMEMDOM]; 140 141 static struct task thread_reap_task; 142 static struct callout thread_reap_callout; 143 144 static void thread_zombie(struct thread *); 145 static void thread_reap_all(void); 146 static void thread_reap_task_cb(void *, int); 147 static void thread_reap_callout_cb(void *); 148 static int thread_unsuspend_one(struct thread *td, struct proc *p, 149 bool boundary); 150 static void thread_free_batched(struct thread *td); 151 152 static __exclusive_cache_line struct mtx tid_lock; 153 static bitstr_t *tid_bitmap; 154 155 static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash"); 156 157 static int maxthread; 158 SYSCTL_INT(_kern, OID_AUTO, maxthread, CTLFLAG_RDTUN, 159 &maxthread, 0, "Maximum number of threads"); 160 161 static __exclusive_cache_line int nthreads; 162 163 static LIST_HEAD(tidhashhead, thread) *tidhashtbl; 164 static u_long tidhash; 165 static u_long tidhashlock; 166 static struct rwlock *tidhashtbl_lock; 167 #define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash]) 168 #define TIDHASHLOCK(tid) (&tidhashtbl_lock[(tid) & tidhashlock]) 169 170 EVENTHANDLER_LIST_DEFINE(thread_ctor); 171 EVENTHANDLER_LIST_DEFINE(thread_dtor); 172 EVENTHANDLER_LIST_DEFINE(thread_init); 173 EVENTHANDLER_LIST_DEFINE(thread_fini); 174 175 static bool 176 thread_count_inc_try(void) 177 { 178 int nthreads_new; 179 180 nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; 181 if (nthreads_new >= maxthread - 100) { 182 if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 || 183 nthreads_new >= maxthread) { 184 atomic_subtract_int(&nthreads, 1); 185 return (false); 186 } 187 } 188 return (true); 189 } 190 191 static bool 192 thread_count_inc(void) 193 { 194 static struct timeval lastfail; 195 static int curfail; 196 197 thread_reap(); 198 if (thread_count_inc_try()) { 199 return (true); 200 } 201 202 thread_reap_all(); 203 if (thread_count_inc_try()) { 204 return (true); 205 } 206 207 if (ppsratecheck(&lastfail, &curfail, 1)) { 208 printf("maxthread limit exceeded by uid %u " 209 "(pid %d); consider increasing kern.maxthread\n", 210 curthread->td_ucred->cr_ruid, curproc->p_pid); 211 } 212 return (false); 213 } 214 215 static void 216 thread_count_sub(int n) 217 { 218 219 atomic_subtract_int(&nthreads, n); 220 } 221 222 static void 223 thread_count_dec(void) 224 { 225 226 thread_count_sub(1); 227 } 228 229 static lwpid_t 230 tid_alloc(void) 231 { 232 static lwpid_t trytid; 233 lwpid_t tid; 234 235 mtx_lock(&tid_lock); 236 /* 237 * It is an invariant that the bitmap is big enough to hold maxthread 238 * IDs. If we got to this point there has to be at least one free. 239 */ 240 if (trytid >= maxthread) 241 trytid = 0; 242 bit_ffc_at(tid_bitmap, trytid, maxthread, &tid); 243 if (tid == -1) { 244 KASSERT(trytid != 0, ("unexpectedly ran out of IDs")); 245 trytid = 0; 246 bit_ffc_at(tid_bitmap, trytid, maxthread, &tid); 247 KASSERT(tid != -1, ("unexpectedly ran out of IDs")); 248 } 249 bit_set(tid_bitmap, tid); 250 trytid = tid + 1; 251 mtx_unlock(&tid_lock); 252 return (tid + NO_PID); 253 } 254 255 static void 256 tid_free_locked(lwpid_t rtid) 257 { 258 lwpid_t tid; 259 260 mtx_assert(&tid_lock, MA_OWNED); 261 KASSERT(rtid >= NO_PID, 262 ("%s: invalid tid %d\n", __func__, rtid)); 263 tid = rtid - NO_PID; 264 KASSERT(bit_test(tid_bitmap, tid) != 0, 265 ("thread ID %d not allocated\n", rtid)); 266 bit_clear(tid_bitmap, tid); 267 } 268 269 static void 270 tid_free(lwpid_t rtid) 271 { 272 273 mtx_lock(&tid_lock); 274 tid_free_locked(rtid); 275 mtx_unlock(&tid_lock); 276 } 277 278 static void 279 tid_free_batch(lwpid_t *batch, int n) 280 { 281 int i; 282 283 mtx_lock(&tid_lock); 284 for (i = 0; i < n; i++) { 285 tid_free_locked(batch[i]); 286 } 287 mtx_unlock(&tid_lock); 288 } 289 290 /* 291 * Batching for thread reapping. 292 */ 293 struct tidbatch { 294 lwpid_t tab[16]; 295 int n; 296 }; 297 298 static void 299 tidbatch_prep(struct tidbatch *tb) 300 { 301 302 tb->n = 0; 303 } 304 305 static void 306 tidbatch_add(struct tidbatch *tb, struct thread *td) 307 { 308 309 KASSERT(tb->n < nitems(tb->tab), 310 ("%s: count too high %d", __func__, tb->n)); 311 tb->tab[tb->n] = td->td_tid; 312 tb->n++; 313 } 314 315 static void 316 tidbatch_process(struct tidbatch *tb) 317 { 318 319 KASSERT(tb->n <= nitems(tb->tab), 320 ("%s: count too high %d", __func__, tb->n)); 321 if (tb->n == nitems(tb->tab)) { 322 tid_free_batch(tb->tab, tb->n); 323 tb->n = 0; 324 } 325 } 326 327 static void 328 tidbatch_final(struct tidbatch *tb) 329 { 330 331 KASSERT(tb->n <= nitems(tb->tab), 332 ("%s: count too high %d", __func__, tb->n)); 333 if (tb->n != 0) { 334 tid_free_batch(tb->tab, tb->n); 335 } 336 } 337 338 /* 339 * Prepare a thread for use. 340 */ 341 static int 342 thread_ctor(void *mem, int size, void *arg, int flags) 343 { 344 struct thread *td; 345 346 td = (struct thread *)mem; 347 td->td_state = TDS_INACTIVE; 348 td->td_lastcpu = td->td_oncpu = NOCPU; 349 350 /* 351 * Note that td_critnest begins life as 1 because the thread is not 352 * running and is thereby implicitly waiting to be on the receiving 353 * end of a context switch. 354 */ 355 td->td_critnest = 1; 356 td->td_lend_user_pri = PRI_MAX; 357 #ifdef AUDIT 358 audit_thread_alloc(td); 359 #endif 360 umtx_thread_alloc(td); 361 MPASS(td->td_sel == NULL); 362 return (0); 363 } 364 365 /* 366 * Reclaim a thread after use. 367 */ 368 static void 369 thread_dtor(void *mem, int size, void *arg) 370 { 371 struct thread *td; 372 373 td = (struct thread *)mem; 374 375 #ifdef INVARIANTS 376 /* Verify that this thread is in a safe state to free. */ 377 switch (td->td_state) { 378 case TDS_INHIBITED: 379 case TDS_RUNNING: 380 case TDS_CAN_RUN: 381 case TDS_RUNQ: 382 /* 383 * We must never unlink a thread that is in one of 384 * these states, because it is currently active. 385 */ 386 panic("bad state for thread unlinking"); 387 /* NOTREACHED */ 388 case TDS_INACTIVE: 389 break; 390 default: 391 panic("bad thread state"); 392 /* NOTREACHED */ 393 } 394 #endif 395 #ifdef AUDIT 396 audit_thread_free(td); 397 #endif 398 /* Free all OSD associated to this thread. */ 399 osd_thread_exit(td); 400 td_softdep_cleanup(td); 401 MPASS(td->td_su == NULL); 402 seltdfini(td); 403 } 404 405 /* 406 * Initialize type-stable parts of a thread (when newly created). 407 */ 408 static int 409 thread_init(void *mem, int size, int flags) 410 { 411 struct thread *td; 412 413 td = (struct thread *)mem; 414 415 td->td_sleepqueue = sleepq_alloc(); 416 td->td_turnstile = turnstile_alloc(); 417 td->td_rlqe = NULL; 418 EVENTHANDLER_DIRECT_INVOKE(thread_init, td); 419 umtx_thread_init(td); 420 td->td_kstack = 0; 421 td->td_sel = NULL; 422 return (0); 423 } 424 425 /* 426 * Tear down type-stable parts of a thread (just before being discarded). 427 */ 428 static void 429 thread_fini(void *mem, int size) 430 { 431 struct thread *td; 432 433 td = (struct thread *)mem; 434 EVENTHANDLER_DIRECT_INVOKE(thread_fini, td); 435 rlqentry_free(td->td_rlqe); 436 turnstile_free(td->td_turnstile); 437 sleepq_free(td->td_sleepqueue); 438 umtx_thread_fini(td); 439 MPASS(td->td_sel == NULL); 440 } 441 442 /* 443 * For a newly created process, 444 * link up all the structures and its initial threads etc. 445 * called from: 446 * {arch}/{arch}/machdep.c {arch}_init(), init386() etc. 447 * proc_dtor() (should go away) 448 * proc_init() 449 */ 450 void 451 proc_linkup0(struct proc *p, struct thread *td) 452 { 453 TAILQ_INIT(&p->p_threads); /* all threads in proc */ 454 proc_linkup(p, td); 455 } 456 457 void 458 proc_linkup(struct proc *p, struct thread *td) 459 { 460 461 sigqueue_init(&p->p_sigqueue, p); 462 p->p_ksi = ksiginfo_alloc(1); 463 if (p->p_ksi != NULL) { 464 /* XXX p_ksi may be null if ksiginfo zone is not ready */ 465 p->p_ksi->ksi_flags = KSI_EXT | KSI_INS; 466 } 467 LIST_INIT(&p->p_mqnotifier); 468 p->p_numthreads = 0; 469 thread_link(td, p); 470 } 471 472 extern int max_threads_per_proc; 473 474 /* 475 * Initialize global thread allocation resources. 476 */ 477 void 478 threadinit(void) 479 { 480 u_long i; 481 lwpid_t tid0; 482 uint32_t flags; 483 484 /* 485 * Place an upper limit on threads which can be allocated. 486 * 487 * Note that other factors may make the de facto limit much lower. 488 * 489 * Platform limits are somewhat arbitrary but deemed "more than good 490 * enough" for the foreseable future. 491 */ 492 if (maxthread == 0) { 493 #ifdef _LP64 494 maxthread = MIN(maxproc * max_threads_per_proc, 1000000); 495 #else 496 maxthread = MIN(maxproc * max_threads_per_proc, 100000); 497 #endif 498 } 499 500 mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF); 501 tid_bitmap = bit_alloc(maxthread, M_TIDHASH, M_WAITOK); 502 /* 503 * Handle thread0. 504 */ 505 thread_count_inc(); 506 tid0 = tid_alloc(); 507 if (tid0 != THREAD0_TID) 508 panic("tid0 %d != %d\n", tid0, THREAD0_TID); 509 510 flags = UMA_ZONE_NOFREE; 511 #ifdef __aarch64__ 512 /* 513 * Force thread structures to be allocated from the direct map. 514 * Otherwise, superpage promotions and demotions may temporarily 515 * invalidate thread structure mappings. For most dynamically allocated 516 * structures this is not a problem, but translation faults cannot be 517 * handled without accessing curthread. 518 */ 519 flags |= UMA_ZONE_CONTIG; 520 #endif 521 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 522 thread_ctor, thread_dtor, thread_init, thread_fini, 523 32 - 1, flags); 524 tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash); 525 tidhashlock = (tidhash + 1) / 64; 526 if (tidhashlock > 0) 527 tidhashlock--; 528 tidhashtbl_lock = malloc(sizeof(*tidhashtbl_lock) * (tidhashlock + 1), 529 M_TIDHASH, M_WAITOK | M_ZERO); 530 for (i = 0; i < tidhashlock + 1; i++) 531 rw_init(&tidhashtbl_lock[i], "tidhash"); 532 533 TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL); 534 callout_init(&thread_reap_callout, 1); 535 callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL); 536 } 537 538 /* 539 * Place an unused thread on the zombie list. 540 */ 541 void 542 thread_zombie(struct thread *td) 543 { 544 struct thread_domain_data *tdd; 545 struct thread *ztd; 546 547 tdd = &thread_domain_data[vm_phys_domain(vtophys(td))]; 548 ztd = atomic_load_ptr(&tdd->tdd_zombies); 549 for (;;) { 550 td->td_zombie = ztd; 551 if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies, 552 (uintptr_t *)&ztd, (uintptr_t)td)) 553 break; 554 continue; 555 } 556 } 557 558 /* 559 * Release a thread that has exited after cpu_throw(). 560 */ 561 void 562 thread_stash(struct thread *td) 563 { 564 atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1); 565 thread_zombie(td); 566 } 567 568 /* 569 * Reap zombies from passed domain. 570 */ 571 static void 572 thread_reap_domain(struct thread_domain_data *tdd) 573 { 574 struct thread *itd, *ntd; 575 struct tidbatch tidbatch; 576 struct credbatch credbatch; 577 int tdcount; 578 struct plimit *lim; 579 int limcount; 580 581 /* 582 * Reading upfront is pessimal if followed by concurrent atomic_swap, 583 * but most of the time the list is empty. 584 */ 585 if (tdd->tdd_zombies == NULL) 586 return; 587 588 itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies, 589 (uintptr_t)NULL); 590 if (itd == NULL) 591 return; 592 593 /* 594 * Multiple CPUs can get here, the race is fine as ticks is only 595 * advisory. 596 */ 597 tdd->tdd_reapticks = ticks; 598 599 tidbatch_prep(&tidbatch); 600 credbatch_prep(&credbatch); 601 tdcount = 0; 602 lim = NULL; 603 limcount = 0; 604 605 while (itd != NULL) { 606 ntd = itd->td_zombie; 607 EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd); 608 tidbatch_add(&tidbatch, itd); 609 credbatch_add(&credbatch, itd); 610 MPASS(itd->td_limit != NULL); 611 if (lim != itd->td_limit) { 612 if (limcount != 0) { 613 lim_freen(lim, limcount); 614 limcount = 0; 615 } 616 } 617 lim = itd->td_limit; 618 limcount++; 619 thread_free_batched(itd); 620 tidbatch_process(&tidbatch); 621 credbatch_process(&credbatch); 622 tdcount++; 623 if (tdcount == 32) { 624 thread_count_sub(tdcount); 625 tdcount = 0; 626 } 627 itd = ntd; 628 } 629 630 tidbatch_final(&tidbatch); 631 credbatch_final(&credbatch); 632 if (tdcount != 0) { 633 thread_count_sub(tdcount); 634 } 635 MPASS(limcount != 0); 636 lim_freen(lim, limcount); 637 } 638 639 /* 640 * Reap zombies from all domains. 641 */ 642 static void 643 thread_reap_all(void) 644 { 645 struct thread_domain_data *tdd; 646 int i, domain; 647 648 domain = PCPU_GET(domain); 649 for (i = 0; i < vm_ndomains; i++) { 650 tdd = &thread_domain_data[(i + domain) % vm_ndomains]; 651 thread_reap_domain(tdd); 652 } 653 } 654 655 /* 656 * Reap zombies from local domain. 657 */ 658 void 659 thread_reap(void) 660 { 661 struct thread_domain_data *tdd; 662 int domain; 663 664 domain = PCPU_GET(domain); 665 tdd = &thread_domain_data[domain]; 666 667 thread_reap_domain(tdd); 668 } 669 670 static void 671 thread_reap_task_cb(void *arg __unused, int pending __unused) 672 { 673 674 thread_reap_all(); 675 } 676 677 static void 678 thread_reap_callout_cb(void *arg __unused) 679 { 680 struct thread_domain_data *tdd; 681 int i, cticks, lticks; 682 bool wantreap; 683 684 wantreap = false; 685 cticks = atomic_load_int(&ticks); 686 for (i = 0; i < vm_ndomains; i++) { 687 tdd = &thread_domain_data[i]; 688 lticks = tdd->tdd_reapticks; 689 if (tdd->tdd_zombies != NULL && 690 (u_int)(cticks - lticks) > 5 * hz) { 691 wantreap = true; 692 break; 693 } 694 } 695 696 if (wantreap) 697 taskqueue_enqueue(taskqueue_thread, &thread_reap_task); 698 callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL); 699 } 700 701 /* 702 * Allocate a thread. 703 */ 704 struct thread * 705 thread_alloc(int pages) 706 { 707 struct thread *td; 708 lwpid_t tid; 709 710 if (!thread_count_inc()) { 711 return (NULL); 712 } 713 714 tid = tid_alloc(); 715 td = uma_zalloc(thread_zone, M_WAITOK); 716 KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack")); 717 if (!vm_thread_new(td, pages)) { 718 uma_zfree(thread_zone, td); 719 tid_free(tid); 720 thread_count_dec(); 721 return (NULL); 722 } 723 td->td_tid = tid; 724 cpu_thread_alloc(td); 725 EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); 726 return (td); 727 } 728 729 int 730 thread_alloc_stack(struct thread *td, int pages) 731 { 732 733 KASSERT(td->td_kstack == 0, 734 ("thread_alloc_stack called on a thread with kstack")); 735 if (!vm_thread_new(td, pages)) 736 return (0); 737 cpu_thread_alloc(td); 738 return (1); 739 } 740 741 /* 742 * Deallocate a thread. 743 */ 744 static void 745 thread_free_batched(struct thread *td) 746 { 747 748 lock_profile_thread_exit(td); 749 if (td->td_cpuset) 750 cpuset_rel(td->td_cpuset); 751 td->td_cpuset = NULL; 752 cpu_thread_free(td); 753 if (td->td_kstack != 0) 754 vm_thread_dispose(td); 755 callout_drain(&td->td_slpcallout); 756 /* 757 * Freeing handled by the caller. 758 */ 759 td->td_tid = -1; 760 uma_zfree(thread_zone, td); 761 } 762 763 void 764 thread_free(struct thread *td) 765 { 766 lwpid_t tid; 767 768 EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td); 769 tid = td->td_tid; 770 thread_free_batched(td); 771 tid_free(tid); 772 thread_count_dec(); 773 } 774 775 void 776 thread_cow_get_proc(struct thread *newtd, struct proc *p) 777 { 778 779 PROC_LOCK_ASSERT(p, MA_OWNED); 780 newtd->td_realucred = crcowget(p->p_ucred); 781 newtd->td_ucred = newtd->td_realucred; 782 newtd->td_limit = lim_hold(p->p_limit); 783 newtd->td_cowgen = p->p_cowgen; 784 } 785 786 void 787 thread_cow_get(struct thread *newtd, struct thread *td) 788 { 789 790 MPASS(td->td_realucred == td->td_ucred); 791 newtd->td_realucred = crcowget(td->td_realucred); 792 newtd->td_ucred = newtd->td_realucred; 793 newtd->td_limit = lim_hold(td->td_limit); 794 newtd->td_cowgen = td->td_cowgen; 795 } 796 797 void 798 thread_cow_free(struct thread *td) 799 { 800 801 if (td->td_realucred != NULL) 802 crcowfree(td); 803 if (td->td_limit != NULL) 804 lim_free(td->td_limit); 805 } 806 807 void 808 thread_cow_update(struct thread *td) 809 { 810 struct proc *p; 811 struct ucred *oldcred; 812 struct plimit *oldlimit; 813 814 p = td->td_proc; 815 oldlimit = NULL; 816 PROC_LOCK(p); 817 oldcred = crcowsync(); 818 if (td->td_limit != p->p_limit) { 819 oldlimit = td->td_limit; 820 td->td_limit = lim_hold(p->p_limit); 821 } 822 td->td_cowgen = p->p_cowgen; 823 PROC_UNLOCK(p); 824 if (oldcred != NULL) 825 crfree(oldcred); 826 if (oldlimit != NULL) 827 lim_free(oldlimit); 828 } 829 830 /* 831 * Discard the current thread and exit from its context. 832 * Always called with scheduler locked. 833 * 834 * Because we can't free a thread while we're operating under its context, 835 * push the current thread into our CPU's deadthread holder. This means 836 * we needn't worry about someone else grabbing our context before we 837 * do a cpu_throw(). 838 */ 839 void 840 thread_exit(void) 841 { 842 uint64_t runtime, new_switchtime; 843 struct thread *td; 844 struct thread *td2; 845 struct proc *p; 846 int wakeup_swapper; 847 848 td = curthread; 849 p = td->td_proc; 850 851 PROC_SLOCK_ASSERT(p, MA_OWNED); 852 mtx_assert(&Giant, MA_NOTOWNED); 853 854 PROC_LOCK_ASSERT(p, MA_OWNED); 855 KASSERT(p != NULL, ("thread exiting without a process")); 856 CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td, 857 (long)p->p_pid, td->td_name); 858 SDT_PROBE0(proc, , , lwp__exit); 859 KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending")); 860 MPASS(td->td_realucred == td->td_ucred); 861 862 /* 863 * drop FPU & debug register state storage, or any other 864 * architecture specific resources that 865 * would not be on a new untouched process. 866 */ 867 cpu_thread_exit(td); 868 869 /* 870 * The last thread is left attached to the process 871 * So that the whole bundle gets recycled. Skip 872 * all this stuff if we never had threads. 873 * EXIT clears all sign of other threads when 874 * it goes to single threading, so the last thread always 875 * takes the short path. 876 */ 877 if (p->p_flag & P_HADTHREADS) { 878 if (p->p_numthreads > 1) { 879 atomic_add_int(&td->td_proc->p_exitthreads, 1); 880 thread_unlink(td); 881 td2 = FIRST_THREAD_IN_PROC(p); 882 sched_exit_thread(td2, td); 883 884 /* 885 * The test below is NOT true if we are the 886 * sole exiting thread. P_STOPPED_SINGLE is unset 887 * in exit1() after it is the only survivor. 888 */ 889 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 890 if (p->p_numthreads == p->p_suspcount) { 891 thread_lock(p->p_singlethread); 892 wakeup_swapper = thread_unsuspend_one( 893 p->p_singlethread, p, false); 894 if (wakeup_swapper) 895 kick_proc0(); 896 } 897 } 898 899 PCPU_SET(deadthread, td); 900 } else { 901 /* 902 * The last thread is exiting.. but not through exit() 903 */ 904 panic ("thread_exit: Last thread exiting on its own"); 905 } 906 } 907 #ifdef HWPMC_HOOKS 908 /* 909 * If this thread is part of a process that is being tracked by hwpmc(4), 910 * inform the module of the thread's impending exit. 911 */ 912 if (PMC_PROC_IS_USING_PMCS(td->td_proc)) { 913 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 914 PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT, NULL); 915 } else if (PMC_SYSTEM_SAMPLING_ACTIVE()) 916 PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT_LOG, NULL); 917 #endif 918 PROC_UNLOCK(p); 919 PROC_STATLOCK(p); 920 thread_lock(td); 921 PROC_SUNLOCK(p); 922 923 /* Do the same timestamp bookkeeping that mi_switch() would do. */ 924 new_switchtime = cpu_ticks(); 925 runtime = new_switchtime - PCPU_GET(switchtime); 926 td->td_runtime += runtime; 927 td->td_incruntime += runtime; 928 PCPU_SET(switchtime, new_switchtime); 929 PCPU_SET(switchticks, ticks); 930 VM_CNT_INC(v_swtch); 931 932 /* Save our resource usage in our process. */ 933 td->td_ru.ru_nvcsw++; 934 ruxagg_locked(p, td); 935 rucollect(&p->p_ru, &td->td_ru); 936 PROC_STATUNLOCK(p); 937 938 td->td_state = TDS_INACTIVE; 939 #ifdef WITNESS 940 witness_thread_exit(td); 941 #endif 942 CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td); 943 sched_throw(td); 944 panic("I'm a teapot!"); 945 /* NOTREACHED */ 946 } 947 948 /* 949 * Do any thread specific cleanups that may be needed in wait() 950 * called with Giant, proc and schedlock not held. 951 */ 952 void 953 thread_wait(struct proc *p) 954 { 955 struct thread *td; 956 957 mtx_assert(&Giant, MA_NOTOWNED); 958 KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()")); 959 KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking")); 960 td = FIRST_THREAD_IN_PROC(p); 961 /* Lock the last thread so we spin until it exits cpu_throw(). */ 962 thread_lock(td); 963 thread_unlock(td); 964 lock_profile_thread_exit(td); 965 cpuset_rel(td->td_cpuset); 966 td->td_cpuset = NULL; 967 cpu_thread_clean(td); 968 thread_cow_free(td); 969 callout_drain(&td->td_slpcallout); 970 thread_reap(); /* check for zombie threads etc. */ 971 } 972 973 /* 974 * Link a thread to a process. 975 * set up anything that needs to be initialized for it to 976 * be used by the process. 977 */ 978 void 979 thread_link(struct thread *td, struct proc *p) 980 { 981 982 /* 983 * XXX This can't be enabled because it's called for proc0 before 984 * its lock has been created. 985 * PROC_LOCK_ASSERT(p, MA_OWNED); 986 */ 987 td->td_state = TDS_INACTIVE; 988 td->td_proc = p; 989 td->td_flags = TDF_INMEM; 990 991 LIST_INIT(&td->td_contested); 992 LIST_INIT(&td->td_lprof[0]); 993 LIST_INIT(&td->td_lprof[1]); 994 #ifdef EPOCH_TRACE 995 SLIST_INIT(&td->td_epochs); 996 #endif 997 sigqueue_init(&td->td_sigqueue, p); 998 callout_init(&td->td_slpcallout, 1); 999 TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist); 1000 p->p_numthreads++; 1001 } 1002 1003 /* 1004 * Called from: 1005 * thread_exit() 1006 */ 1007 void 1008 thread_unlink(struct thread *td) 1009 { 1010 struct proc *p = td->td_proc; 1011 1012 PROC_LOCK_ASSERT(p, MA_OWNED); 1013 #ifdef EPOCH_TRACE 1014 MPASS(SLIST_EMPTY(&td->td_epochs)); 1015 #endif 1016 1017 TAILQ_REMOVE(&p->p_threads, td, td_plist); 1018 p->p_numthreads--; 1019 /* could clear a few other things here */ 1020 /* Must NOT clear links to proc! */ 1021 } 1022 1023 static int 1024 calc_remaining(struct proc *p, int mode) 1025 { 1026 int remaining; 1027 1028 PROC_LOCK_ASSERT(p, MA_OWNED); 1029 PROC_SLOCK_ASSERT(p, MA_OWNED); 1030 if (mode == SINGLE_EXIT) 1031 remaining = p->p_numthreads; 1032 else if (mode == SINGLE_BOUNDARY) 1033 remaining = p->p_numthreads - p->p_boundary_count; 1034 else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC) 1035 remaining = p->p_numthreads - p->p_suspcount; 1036 else 1037 panic("calc_remaining: wrong mode %d", mode); 1038 return (remaining); 1039 } 1040 1041 static int 1042 remain_for_mode(int mode) 1043 { 1044 1045 return (mode == SINGLE_ALLPROC ? 0 : 1); 1046 } 1047 1048 static int 1049 weed_inhib(int mode, struct thread *td2, struct proc *p) 1050 { 1051 int wakeup_swapper; 1052 1053 PROC_LOCK_ASSERT(p, MA_OWNED); 1054 PROC_SLOCK_ASSERT(p, MA_OWNED); 1055 THREAD_LOCK_ASSERT(td2, MA_OWNED); 1056 1057 wakeup_swapper = 0; 1058 1059 /* 1060 * Since the thread lock is dropped by the scheduler we have 1061 * to retry to check for races. 1062 */ 1063 restart: 1064 switch (mode) { 1065 case SINGLE_EXIT: 1066 if (TD_IS_SUSPENDED(td2)) { 1067 wakeup_swapper |= thread_unsuspend_one(td2, p, true); 1068 thread_lock(td2); 1069 goto restart; 1070 } 1071 if (TD_CAN_ABORT(td2)) { 1072 wakeup_swapper |= sleepq_abort(td2, EINTR); 1073 return (wakeup_swapper); 1074 } 1075 break; 1076 case SINGLE_BOUNDARY: 1077 case SINGLE_NO_EXIT: 1078 if (TD_IS_SUSPENDED(td2) && 1079 (td2->td_flags & TDF_BOUNDARY) == 0) { 1080 wakeup_swapper |= thread_unsuspend_one(td2, p, false); 1081 thread_lock(td2); 1082 goto restart; 1083 } 1084 if (TD_CAN_ABORT(td2)) { 1085 wakeup_swapper |= sleepq_abort(td2, ERESTART); 1086 return (wakeup_swapper); 1087 } 1088 break; 1089 case SINGLE_ALLPROC: 1090 /* 1091 * ALLPROC suspend tries to avoid spurious EINTR for 1092 * threads sleeping interruptable, by suspending the 1093 * thread directly, similarly to sig_suspend_threads(). 1094 * Since such sleep is not performed at the user 1095 * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP 1096 * is used to avoid immediate un-suspend. 1097 */ 1098 if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY | 1099 TDF_ALLPROCSUSP)) == 0) { 1100 wakeup_swapper |= thread_unsuspend_one(td2, p, false); 1101 thread_lock(td2); 1102 goto restart; 1103 } 1104 if (TD_CAN_ABORT(td2)) { 1105 if ((td2->td_flags & TDF_SBDRY) == 0) { 1106 thread_suspend_one(td2); 1107 td2->td_flags |= TDF_ALLPROCSUSP; 1108 } else { 1109 wakeup_swapper |= sleepq_abort(td2, ERESTART); 1110 return (wakeup_swapper); 1111 } 1112 } 1113 break; 1114 default: 1115 break; 1116 } 1117 thread_unlock(td2); 1118 return (wakeup_swapper); 1119 } 1120 1121 /* 1122 * Enforce single-threading. 1123 * 1124 * Returns 1 if the caller must abort (another thread is waiting to 1125 * exit the process or similar). Process is locked! 1126 * Returns 0 when you are successfully the only thread running. 1127 * A process has successfully single threaded in the suspend mode when 1128 * There are no threads in user mode. Threads in the kernel must be 1129 * allowed to continue until they get to the user boundary. They may even 1130 * copy out their return values and data before suspending. They may however be 1131 * accelerated in reaching the user boundary as we will wake up 1132 * any sleeping threads that are interruptable. (PCATCH). 1133 */ 1134 int 1135 thread_single(struct proc *p, int mode) 1136 { 1137 struct thread *td; 1138 struct thread *td2; 1139 int remaining, wakeup_swapper; 1140 1141 td = curthread; 1142 KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY || 1143 mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT, 1144 ("invalid mode %d", mode)); 1145 /* 1146 * If allowing non-ALLPROC singlethreading for non-curproc 1147 * callers, calc_remaining() and remain_for_mode() should be 1148 * adjusted to also account for td->td_proc != p. For now 1149 * this is not implemented because it is not used. 1150 */ 1151 KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) || 1152 (mode != SINGLE_ALLPROC && td->td_proc == p), 1153 ("mode %d proc %p curproc %p", mode, p, td->td_proc)); 1154 mtx_assert(&Giant, MA_NOTOWNED); 1155 PROC_LOCK_ASSERT(p, MA_OWNED); 1156 1157 if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC) 1158 return (0); 1159 1160 /* Is someone already single threading? */ 1161 if (p->p_singlethread != NULL && p->p_singlethread != td) 1162 return (1); 1163 1164 if (mode == SINGLE_EXIT) { 1165 p->p_flag |= P_SINGLE_EXIT; 1166 p->p_flag &= ~P_SINGLE_BOUNDARY; 1167 } else { 1168 p->p_flag &= ~P_SINGLE_EXIT; 1169 if (mode == SINGLE_BOUNDARY) 1170 p->p_flag |= P_SINGLE_BOUNDARY; 1171 else 1172 p->p_flag &= ~P_SINGLE_BOUNDARY; 1173 } 1174 if (mode == SINGLE_ALLPROC) 1175 p->p_flag |= P_TOTAL_STOP; 1176 p->p_flag |= P_STOPPED_SINGLE; 1177 PROC_SLOCK(p); 1178 p->p_singlethread = td; 1179 remaining = calc_remaining(p, mode); 1180 while (remaining != remain_for_mode(mode)) { 1181 if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE) 1182 goto stopme; 1183 wakeup_swapper = 0; 1184 FOREACH_THREAD_IN_PROC(p, td2) { 1185 if (td2 == td) 1186 continue; 1187 thread_lock(td2); 1188 td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; 1189 if (TD_IS_INHIBITED(td2)) { 1190 wakeup_swapper |= weed_inhib(mode, td2, p); 1191 #ifdef SMP 1192 } else if (TD_IS_RUNNING(td2) && td != td2) { 1193 forward_signal(td2); 1194 thread_unlock(td2); 1195 #endif 1196 } else 1197 thread_unlock(td2); 1198 } 1199 if (wakeup_swapper) 1200 kick_proc0(); 1201 remaining = calc_remaining(p, mode); 1202 1203 /* 1204 * Maybe we suspended some threads.. was it enough? 1205 */ 1206 if (remaining == remain_for_mode(mode)) 1207 break; 1208 1209 stopme: 1210 /* 1211 * Wake us up when everyone else has suspended. 1212 * In the mean time we suspend as well. 1213 */ 1214 thread_suspend_switch(td, p); 1215 remaining = calc_remaining(p, mode); 1216 } 1217 if (mode == SINGLE_EXIT) { 1218 /* 1219 * Convert the process to an unthreaded process. The 1220 * SINGLE_EXIT is called by exit1() or execve(), in 1221 * both cases other threads must be retired. 1222 */ 1223 KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads")); 1224 p->p_singlethread = NULL; 1225 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS); 1226 1227 /* 1228 * Wait for any remaining threads to exit cpu_throw(). 1229 */ 1230 while (p->p_exitthreads != 0) { 1231 PROC_SUNLOCK(p); 1232 PROC_UNLOCK(p); 1233 sched_relinquish(td); 1234 PROC_LOCK(p); 1235 PROC_SLOCK(p); 1236 } 1237 } else if (mode == SINGLE_BOUNDARY) { 1238 /* 1239 * Wait until all suspended threads are removed from 1240 * the processors. The thread_suspend_check() 1241 * increments p_boundary_count while it is still 1242 * running, which makes it possible for the execve() 1243 * to destroy vmspace while our other threads are 1244 * still using the address space. 1245 * 1246 * We lock the thread, which is only allowed to 1247 * succeed after context switch code finished using 1248 * the address space. 1249 */ 1250 FOREACH_THREAD_IN_PROC(p, td2) { 1251 if (td2 == td) 1252 continue; 1253 thread_lock(td2); 1254 KASSERT((td2->td_flags & TDF_BOUNDARY) != 0, 1255 ("td %p not on boundary", td2)); 1256 KASSERT(TD_IS_SUSPENDED(td2), 1257 ("td %p is not suspended", td2)); 1258 thread_unlock(td2); 1259 } 1260 } 1261 PROC_SUNLOCK(p); 1262 return (0); 1263 } 1264 1265 bool 1266 thread_suspend_check_needed(void) 1267 { 1268 struct proc *p; 1269 struct thread *td; 1270 1271 td = curthread; 1272 p = td->td_proc; 1273 PROC_LOCK_ASSERT(p, MA_OWNED); 1274 return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 && 1275 (td->td_dbgflags & TDB_SUSPEND) != 0)); 1276 } 1277 1278 /* 1279 * Called in from locations that can safely check to see 1280 * whether we have to suspend or at least throttle for a 1281 * single-thread event (e.g. fork). 1282 * 1283 * Such locations include userret(). 1284 * If the "return_instead" argument is non zero, the thread must be able to 1285 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 1286 * 1287 * The 'return_instead' argument tells the function if it may do a 1288 * thread_exit() or suspend, or whether the caller must abort and back 1289 * out instead. 1290 * 1291 * If the thread that set the single_threading request has set the 1292 * P_SINGLE_EXIT bit in the process flags then this call will never return 1293 * if 'return_instead' is false, but will exit. 1294 * 1295 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 1296 *---------------+--------------------+--------------------- 1297 * 0 | returns 0 | returns 0 or 1 1298 * | when ST ends | immediately 1299 *---------------+--------------------+--------------------- 1300 * 1 | thread exits | returns 1 1301 * | | immediately 1302 * 0 = thread_exit() or suspension ok, 1303 * other = return error instead of stopping the thread. 1304 * 1305 * While a full suspension is under effect, even a single threading 1306 * thread would be suspended if it made this call (but it shouldn't). 1307 * This call should only be made from places where 1308 * thread_exit() would be safe as that may be the outcome unless 1309 * return_instead is set. 1310 */ 1311 int 1312 thread_suspend_check(int return_instead) 1313 { 1314 struct thread *td; 1315 struct proc *p; 1316 int wakeup_swapper; 1317 1318 td = curthread; 1319 p = td->td_proc; 1320 mtx_assert(&Giant, MA_NOTOWNED); 1321 PROC_LOCK_ASSERT(p, MA_OWNED); 1322 while (thread_suspend_check_needed()) { 1323 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1324 KASSERT(p->p_singlethread != NULL, 1325 ("singlethread not set")); 1326 /* 1327 * The only suspension in action is a 1328 * single-threading. Single threader need not stop. 1329 * It is safe to access p->p_singlethread unlocked 1330 * because it can only be set to our address by us. 1331 */ 1332 if (p->p_singlethread == td) 1333 return (0); /* Exempt from stopping. */ 1334 } 1335 if ((p->p_flag & P_SINGLE_EXIT) && return_instead) 1336 return (EINTR); 1337 1338 /* Should we goto user boundary if we didn't come from there? */ 1339 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE && 1340 (p->p_flag & P_SINGLE_BOUNDARY) && return_instead) 1341 return (ERESTART); 1342 1343 /* 1344 * Ignore suspend requests if they are deferred. 1345 */ 1346 if ((td->td_flags & TDF_SBDRY) != 0) { 1347 KASSERT(return_instead, 1348 ("TDF_SBDRY set for unsafe thread_suspend_check")); 1349 KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) != 1350 (TDF_SEINTR | TDF_SERESTART), 1351 ("both TDF_SEINTR and TDF_SERESTART")); 1352 return (TD_SBDRY_INTR(td) ? TD_SBDRY_ERRNO(td) : 0); 1353 } 1354 1355 /* 1356 * If the process is waiting for us to exit, 1357 * this thread should just suicide. 1358 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 1359 */ 1360 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1361 PROC_UNLOCK(p); 1362 1363 /* 1364 * Allow Linux emulation layer to do some work 1365 * before thread suicide. 1366 */ 1367 if (__predict_false(p->p_sysent->sv_thread_detach != NULL)) 1368 (p->p_sysent->sv_thread_detach)(td); 1369 umtx_thread_exit(td); 1370 kern_thr_exit(td); 1371 panic("stopped thread did not exit"); 1372 } 1373 1374 PROC_SLOCK(p); 1375 thread_stopped(p); 1376 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1377 if (p->p_numthreads == p->p_suspcount + 1) { 1378 thread_lock(p->p_singlethread); 1379 wakeup_swapper = thread_unsuspend_one( 1380 p->p_singlethread, p, false); 1381 if (wakeup_swapper) 1382 kick_proc0(); 1383 } 1384 } 1385 PROC_UNLOCK(p); 1386 thread_lock(td); 1387 /* 1388 * When a thread suspends, it just 1389 * gets taken off all queues. 1390 */ 1391 thread_suspend_one(td); 1392 if (return_instead == 0) { 1393 p->p_boundary_count++; 1394 td->td_flags |= TDF_BOUNDARY; 1395 } 1396 PROC_SUNLOCK(p); 1397 mi_switch(SW_INVOL | SWT_SUSPEND); 1398 PROC_LOCK(p); 1399 } 1400 return (0); 1401 } 1402 1403 /* 1404 * Check for possible stops and suspensions while executing a 1405 * casueword or similar transiently failing operation. 1406 * 1407 * The sleep argument controls whether the function can handle a stop 1408 * request itself or it should return ERESTART and the request is 1409 * proceed at the kernel/user boundary in ast. 1410 * 1411 * Typically, when retrying due to casueword(9) failure (rv == 1), we 1412 * should handle the stop requests there, with exception of cases when 1413 * the thread owns a kernel resource, for instance busied the umtx 1414 * key, or when functions return immediately if thread_check_susp() 1415 * returned non-zero. On the other hand, retrying the whole lock 1416 * operation, we better not stop there but delegate the handling to 1417 * ast. 1418 * 1419 * If the request is for thread termination P_SINGLE_EXIT, we cannot 1420 * handle it at all, and simply return EINTR. 1421 */ 1422 int 1423 thread_check_susp(struct thread *td, bool sleep) 1424 { 1425 struct proc *p; 1426 int error; 1427 1428 /* 1429 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 1430 * eventually break the lockstep loop. 1431 */ 1432 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 1433 return (0); 1434 error = 0; 1435 p = td->td_proc; 1436 PROC_LOCK(p); 1437 if (p->p_flag & P_SINGLE_EXIT) 1438 error = EINTR; 1439 else if (P_SHOULDSTOP(p) || 1440 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) 1441 error = sleep ? thread_suspend_check(0) : ERESTART; 1442 PROC_UNLOCK(p); 1443 return (error); 1444 } 1445 1446 void 1447 thread_suspend_switch(struct thread *td, struct proc *p) 1448 { 1449 1450 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); 1451 PROC_LOCK_ASSERT(p, MA_OWNED); 1452 PROC_SLOCK_ASSERT(p, MA_OWNED); 1453 /* 1454 * We implement thread_suspend_one in stages here to avoid 1455 * dropping the proc lock while the thread lock is owned. 1456 */ 1457 if (p == td->td_proc) { 1458 thread_stopped(p); 1459 p->p_suspcount++; 1460 } 1461 PROC_UNLOCK(p); 1462 thread_lock(td); 1463 td->td_flags &= ~TDF_NEEDSUSPCHK; 1464 TD_SET_SUSPENDED(td); 1465 sched_sleep(td, 0); 1466 PROC_SUNLOCK(p); 1467 DROP_GIANT(); 1468 mi_switch(SW_VOL | SWT_SUSPEND); 1469 PICKUP_GIANT(); 1470 PROC_LOCK(p); 1471 PROC_SLOCK(p); 1472 } 1473 1474 void 1475 thread_suspend_one(struct thread *td) 1476 { 1477 struct proc *p; 1478 1479 p = td->td_proc; 1480 PROC_SLOCK_ASSERT(p, MA_OWNED); 1481 THREAD_LOCK_ASSERT(td, MA_OWNED); 1482 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); 1483 p->p_suspcount++; 1484 td->td_flags &= ~TDF_NEEDSUSPCHK; 1485 TD_SET_SUSPENDED(td); 1486 sched_sleep(td, 0); 1487 } 1488 1489 static int 1490 thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary) 1491 { 1492 1493 THREAD_LOCK_ASSERT(td, MA_OWNED); 1494 KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended")); 1495 TD_CLR_SUSPENDED(td); 1496 td->td_flags &= ~TDF_ALLPROCSUSP; 1497 if (td->td_proc == p) { 1498 PROC_SLOCK_ASSERT(p, MA_OWNED); 1499 p->p_suspcount--; 1500 if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) { 1501 td->td_flags &= ~TDF_BOUNDARY; 1502 p->p_boundary_count--; 1503 } 1504 } 1505 return (setrunnable(td, 0)); 1506 } 1507 1508 /* 1509 * Allow all threads blocked by single threading to continue running. 1510 */ 1511 void 1512 thread_unsuspend(struct proc *p) 1513 { 1514 struct thread *td; 1515 int wakeup_swapper; 1516 1517 PROC_LOCK_ASSERT(p, MA_OWNED); 1518 PROC_SLOCK_ASSERT(p, MA_OWNED); 1519 wakeup_swapper = 0; 1520 if (!P_SHOULDSTOP(p)) { 1521 FOREACH_THREAD_IN_PROC(p, td) { 1522 thread_lock(td); 1523 if (TD_IS_SUSPENDED(td)) { 1524 wakeup_swapper |= thread_unsuspend_one(td, p, 1525 true); 1526 } else 1527 thread_unlock(td); 1528 } 1529 } else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE && 1530 p->p_numthreads == p->p_suspcount) { 1531 /* 1532 * Stopping everything also did the job for the single 1533 * threading request. Now we've downgraded to single-threaded, 1534 * let it continue. 1535 */ 1536 if (p->p_singlethread->td_proc == p) { 1537 thread_lock(p->p_singlethread); 1538 wakeup_swapper = thread_unsuspend_one( 1539 p->p_singlethread, p, false); 1540 } 1541 } 1542 if (wakeup_swapper) 1543 kick_proc0(); 1544 } 1545 1546 /* 1547 * End the single threading mode.. 1548 */ 1549 void 1550 thread_single_end(struct proc *p, int mode) 1551 { 1552 struct thread *td; 1553 int wakeup_swapper; 1554 1555 KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY || 1556 mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT, 1557 ("invalid mode %d", mode)); 1558 PROC_LOCK_ASSERT(p, MA_OWNED); 1559 KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) || 1560 (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0), 1561 ("mode %d does not match P_TOTAL_STOP", mode)); 1562 KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread, 1563 ("thread_single_end from other thread %p %p", 1564 curthread, p->p_singlethread)); 1565 KASSERT(mode != SINGLE_BOUNDARY || 1566 (p->p_flag & P_SINGLE_BOUNDARY) != 0, 1567 ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag)); 1568 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY | 1569 P_TOTAL_STOP); 1570 PROC_SLOCK(p); 1571 p->p_singlethread = NULL; 1572 wakeup_swapper = 0; 1573 /* 1574 * If there are other threads they may now run, 1575 * unless of course there is a blanket 'stop order' 1576 * on the process. The single threader must be allowed 1577 * to continue however as this is a bad place to stop. 1578 */ 1579 if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) { 1580 FOREACH_THREAD_IN_PROC(p, td) { 1581 thread_lock(td); 1582 if (TD_IS_SUSPENDED(td)) { 1583 wakeup_swapper |= thread_unsuspend_one(td, p, 1584 mode == SINGLE_BOUNDARY); 1585 } else 1586 thread_unlock(td); 1587 } 1588 } 1589 KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0, 1590 ("inconsistent boundary count %d", p->p_boundary_count)); 1591 PROC_SUNLOCK(p); 1592 if (wakeup_swapper) 1593 kick_proc0(); 1594 } 1595 1596 /* 1597 * Locate a thread by number and return with proc lock held. 1598 * 1599 * thread exit establishes proc -> tidhash lock ordering, but lookup 1600 * takes tidhash first and needs to return locked proc. 1601 * 1602 * The problem is worked around by relying on type-safety of both 1603 * structures and doing the work in 2 steps: 1604 * - tidhash-locked lookup which saves both thread and proc pointers 1605 * - proc-locked verification that the found thread still matches 1606 */ 1607 static bool 1608 tdfind_hash(lwpid_t tid, pid_t pid, struct proc **pp, struct thread **tdp) 1609 { 1610 #define RUN_THRESH 16 1611 struct proc *p; 1612 struct thread *td; 1613 int run; 1614 bool locked; 1615 1616 run = 0; 1617 rw_rlock(TIDHASHLOCK(tid)); 1618 locked = true; 1619 LIST_FOREACH(td, TIDHASH(tid), td_hash) { 1620 if (td->td_tid != tid) { 1621 run++; 1622 continue; 1623 } 1624 p = td->td_proc; 1625 if (pid != -1 && p->p_pid != pid) { 1626 td = NULL; 1627 break; 1628 } 1629 if (run > RUN_THRESH) { 1630 if (rw_try_upgrade(TIDHASHLOCK(tid))) { 1631 LIST_REMOVE(td, td_hash); 1632 LIST_INSERT_HEAD(TIDHASH(td->td_tid), 1633 td, td_hash); 1634 rw_wunlock(TIDHASHLOCK(tid)); 1635 locked = false; 1636 break; 1637 } 1638 } 1639 break; 1640 } 1641 if (locked) 1642 rw_runlock(TIDHASHLOCK(tid)); 1643 if (td == NULL) 1644 return (false); 1645 *pp = p; 1646 *tdp = td; 1647 return (true); 1648 } 1649 1650 struct thread * 1651 tdfind(lwpid_t tid, pid_t pid) 1652 { 1653 struct proc *p; 1654 struct thread *td; 1655 1656 td = curthread; 1657 if (td->td_tid == tid) { 1658 if (pid != -1 && td->td_proc->p_pid != pid) 1659 return (NULL); 1660 PROC_LOCK(td->td_proc); 1661 return (td); 1662 } 1663 1664 for (;;) { 1665 if (!tdfind_hash(tid, pid, &p, &td)) 1666 return (NULL); 1667 PROC_LOCK(p); 1668 if (td->td_tid != tid) { 1669 PROC_UNLOCK(p); 1670 continue; 1671 } 1672 if (td->td_proc != p) { 1673 PROC_UNLOCK(p); 1674 continue; 1675 } 1676 if (p->p_state == PRS_NEW) { 1677 PROC_UNLOCK(p); 1678 return (NULL); 1679 } 1680 return (td); 1681 } 1682 } 1683 1684 void 1685 tidhash_add(struct thread *td) 1686 { 1687 rw_wlock(TIDHASHLOCK(td->td_tid)); 1688 LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); 1689 rw_wunlock(TIDHASHLOCK(td->td_tid)); 1690 } 1691 1692 void 1693 tidhash_remove(struct thread *td) 1694 { 1695 1696 rw_wlock(TIDHASHLOCK(td->td_tid)); 1697 LIST_REMOVE(td, td_hash); 1698 rw_wunlock(TIDHASHLOCK(td->td_tid)); 1699 } 1700