1 /*- 2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice(s), this list of conditions and the following disclaimer as 10 * the first lines of this file unmodified other than the possible 11 * addition of one or more copyright notices. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice(s), this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/lock.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/smp.h> 39 #include <sys/sysctl.h> 40 #include <sys/sched.h> 41 #include <sys/sleepqueue.h> 42 #include <sys/turnstile.h> 43 #include <sys/ktr.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_extern.h> 47 #include <vm/uma.h> 48 49 /* 50 * KSEGRP related storage. 51 */ 52 static uma_zone_t ksegrp_zone; 53 static uma_zone_t thread_zone; 54 55 /* DEBUG ONLY */ 56 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 57 static int thread_debug = 0; 58 SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW, 59 &thread_debug, 0, "thread debug"); 60 61 int max_threads_per_proc = 1500; 62 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 63 &max_threads_per_proc, 0, "Limit on threads per proc"); 64 65 int max_groups_per_proc = 1500; 66 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 67 &max_groups_per_proc, 0, "Limit on thread groups per proc"); 68 69 int max_threads_hits; 70 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD, 71 &max_threads_hits, 0, ""); 72 73 int virtual_cpu; 74 75 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 76 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 77 struct mtx kse_zombie_lock; 78 MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN); 79 80 static int 81 sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS) 82 { 83 int error, new_val; 84 int def_val; 85 86 def_val = mp_ncpus; 87 if (virtual_cpu == 0) 88 new_val = def_val; 89 else 90 new_val = virtual_cpu; 91 error = sysctl_handle_int(oidp, &new_val, 0, req); 92 if (error != 0 || req->newptr == NULL) 93 return (error); 94 if (new_val < 0) 95 return (EINVAL); 96 virtual_cpu = new_val; 97 return (0); 98 } 99 100 /* DEBUG ONLY */ 101 SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW, 102 0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I", 103 "debug virtual cpus"); 104 105 /* 106 * Thread ID allocator. The allocator keeps track of assigned IDs by 107 * using a bitmap. The bitmap is created in parts. The parts are linked 108 * together. 109 */ 110 typedef u_long tid_bitmap_word; 111 112 #define TID_IDS_PER_PART 1024 113 #define TID_IDS_PER_IDX (sizeof(tid_bitmap_word) << 3) 114 #define TID_BITMAP_SIZE (TID_IDS_PER_PART / TID_IDS_PER_IDX) 115 #define TID_MIN (PID_MAX + 1) 116 117 struct tid_bitmap_part { 118 STAILQ_ENTRY(tid_bitmap_part) bmp_next; 119 tid_bitmap_word bmp_bitmap[TID_BITMAP_SIZE]; 120 lwpid_t bmp_base; 121 int bmp_free; 122 }; 123 124 static STAILQ_HEAD(, tid_bitmap_part) tid_bitmap = 125 STAILQ_HEAD_INITIALIZER(tid_bitmap); 126 static uma_zone_t tid_zone; 127 128 struct mtx tid_lock; 129 MTX_SYSINIT(tid_lock, &tid_lock, "TID lock", MTX_DEF); 130 131 /* 132 * Prepare a thread for use. 133 */ 134 static int 135 thread_ctor(void *mem, int size, void *arg, int flags) 136 { 137 struct thread *td; 138 139 td = (struct thread *)mem; 140 td->td_state = TDS_INACTIVE; 141 td->td_oncpu = NOCPU; 142 143 /* 144 * Note that td_critnest begins life as 1 because the thread is not 145 * running and is thereby implicitly waiting to be on the receiving 146 * end of a context switch. A context switch must occur inside a 147 * critical section, and in fact, includes hand-off of the sched_lock. 148 * After a context switch to a newly created thread, it will release 149 * sched_lock for the first time, and its td_critnest will hit 0 for 150 * the first time. This happens on the far end of a context switch, 151 * and when it context switches away from itself, it will in fact go 152 * back into a critical section, and hand off the sched lock to the 153 * next thread. 154 */ 155 td->td_critnest = 1; 156 return (0); 157 } 158 159 /* 160 * Reclaim a thread after use. 161 */ 162 static void 163 thread_dtor(void *mem, int size, void *arg) 164 { 165 struct thread *td; 166 167 td = (struct thread *)mem; 168 169 #ifdef INVARIANTS 170 /* Verify that this thread is in a safe state to free. */ 171 switch (td->td_state) { 172 case TDS_INHIBITED: 173 case TDS_RUNNING: 174 case TDS_CAN_RUN: 175 case TDS_RUNQ: 176 /* 177 * We must never unlink a thread that is in one of 178 * these states, because it is currently active. 179 */ 180 panic("bad state for thread unlinking"); 181 /* NOTREACHED */ 182 case TDS_INACTIVE: 183 break; 184 default: 185 panic("bad thread state"); 186 /* NOTREACHED */ 187 } 188 #endif 189 sched_newthread(td); 190 } 191 192 /* 193 * Initialize type-stable parts of a thread (when newly created). 194 */ 195 static int 196 thread_init(void *mem, int size, int flags) 197 { 198 struct thread *td; 199 struct tid_bitmap_part *bmp, *new; 200 int bit, idx; 201 202 td = (struct thread *)mem; 203 204 mtx_lock(&tid_lock); 205 STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 206 if (bmp->bmp_free) 207 break; 208 } 209 /* Create a new bitmap if we run out of free bits. */ 210 if (bmp == NULL) { 211 mtx_unlock(&tid_lock); 212 new = uma_zalloc(tid_zone, M_WAITOK); 213 mtx_lock(&tid_lock); 214 bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next); 215 if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) { 216 /* 1=free, 0=assigned. This way we can use ffsl(). */ 217 memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap)); 218 new->bmp_base = (bmp == NULL) ? TID_MIN : 219 bmp->bmp_base + TID_IDS_PER_PART; 220 new->bmp_free = TID_IDS_PER_PART; 221 STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next); 222 bmp = new; 223 new = NULL; 224 } 225 } else 226 new = NULL; 227 /* We have a bitmap with available IDs. */ 228 idx = 0; 229 while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL) 230 idx++; 231 bit = ffsl(bmp->bmp_bitmap[idx]) - 1; 232 td->td_tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit; 233 bmp->bmp_bitmap[idx] &= ~(1UL << bit); 234 bmp->bmp_free--; 235 mtx_unlock(&tid_lock); 236 if (new != NULL) 237 uma_zfree(tid_zone, new); 238 239 vm_thread_new(td, 0); 240 cpu_thread_setup(td); 241 td->td_sleepqueue = sleepq_alloc(); 242 td->td_turnstile = turnstile_alloc(); 243 td->td_sched = (struct td_sched *)&td[1]; 244 sched_newthread(td); 245 return (0); 246 } 247 248 /* 249 * Tear down type-stable parts of a thread (just before being discarded). 250 */ 251 static void 252 thread_fini(void *mem, int size) 253 { 254 struct thread *td; 255 struct tid_bitmap_part *bmp; 256 lwpid_t tid; 257 int bit, idx; 258 259 td = (struct thread *)mem; 260 turnstile_free(td->td_turnstile); 261 sleepq_free(td->td_sleepqueue); 262 vm_thread_dispose(td); 263 264 STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 265 if (td->td_tid >= bmp->bmp_base && 266 td->td_tid < bmp->bmp_base + TID_IDS_PER_PART) 267 break; 268 } 269 KASSERT(bmp != NULL, ("No TID bitmap?")); 270 mtx_lock(&tid_lock); 271 tid = td->td_tid - bmp->bmp_base; 272 idx = tid / TID_IDS_PER_IDX; 273 bit = 1UL << (tid % TID_IDS_PER_IDX); 274 bmp->bmp_bitmap[idx] |= bit; 275 bmp->bmp_free++; 276 mtx_unlock(&tid_lock); 277 } 278 279 /* 280 * Initialize type-stable parts of a ksegrp (when newly created). 281 */ 282 static int 283 ksegrp_ctor(void *mem, int size, void *arg, int flags) 284 { 285 struct ksegrp *kg; 286 287 kg = (struct ksegrp *)mem; 288 bzero(mem, size); 289 kg->kg_sched = (struct kg_sched *)&kg[1]; 290 return (0); 291 } 292 293 void 294 ksegrp_link(struct ksegrp *kg, struct proc *p) 295 { 296 297 TAILQ_INIT(&kg->kg_threads); 298 TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 299 TAILQ_INIT(&kg->kg_upcalls); /* all upcall structure in ksegrp */ 300 kg->kg_proc = p; 301 /* 302 * the following counters are in the -zero- section 303 * and may not need clearing 304 */ 305 kg->kg_numthreads = 0; 306 kg->kg_numupcalls = 0; 307 /* link it in now that it's consistent */ 308 p->p_numksegrps++; 309 TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 310 } 311 312 /* 313 * Called from: 314 * thread-exit() 315 */ 316 void 317 ksegrp_unlink(struct ksegrp *kg) 318 { 319 struct proc *p; 320 321 mtx_assert(&sched_lock, MA_OWNED); 322 KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads")); 323 KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls")); 324 325 p = kg->kg_proc; 326 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 327 p->p_numksegrps--; 328 /* 329 * Aggregate stats from the KSE 330 */ 331 } 332 333 /* 334 * For a newly created process, 335 * link up all the structures and its initial threads etc. 336 * called from: 337 * {arch}/{arch}/machdep.c ia64_init(), init386() etc. 338 * proc_dtor() (should go away) 339 * proc_init() 340 */ 341 void 342 proc_linkup(struct proc *p, struct ksegrp *kg, struct thread *td) 343 { 344 345 TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 346 TAILQ_INIT(&p->p_threads); /* all threads in proc */ 347 TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 348 p->p_numksegrps = 0; 349 p->p_numthreads = 0; 350 351 ksegrp_link(kg, p); 352 thread_link(td, kg); 353 } 354 355 /* 356 * Initialize global thread allocation resources. 357 */ 358 void 359 threadinit(void) 360 { 361 362 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 363 thread_ctor, thread_dtor, thread_init, thread_fini, 364 UMA_ALIGN_CACHE, 0); 365 tid_zone = uma_zcreate("TID", sizeof(struct tid_bitmap_part), 366 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); 367 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 368 ksegrp_ctor, NULL, NULL, NULL, 369 UMA_ALIGN_CACHE, 0); 370 kseinit(); /* set up kse specific stuff e.g. upcall zone*/ 371 } 372 373 /* 374 * Stash an embarasingly extra thread into the zombie thread queue. 375 */ 376 void 377 thread_stash(struct thread *td) 378 { 379 mtx_lock_spin(&kse_zombie_lock); 380 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 381 mtx_unlock_spin(&kse_zombie_lock); 382 } 383 384 /* 385 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 386 */ 387 void 388 ksegrp_stash(struct ksegrp *kg) 389 { 390 mtx_lock_spin(&kse_zombie_lock); 391 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 392 mtx_unlock_spin(&kse_zombie_lock); 393 } 394 395 /* 396 * Reap zombie kse resource. 397 */ 398 void 399 thread_reap(void) 400 { 401 struct thread *td_first, *td_next; 402 struct ksegrp *kg_first, * kg_next; 403 404 /* 405 * Don't even bother to lock if none at this instant, 406 * we really don't care about the next instant.. 407 */ 408 if ((!TAILQ_EMPTY(&zombie_threads)) 409 || (!TAILQ_EMPTY(&zombie_ksegrps))) { 410 mtx_lock_spin(&kse_zombie_lock); 411 td_first = TAILQ_FIRST(&zombie_threads); 412 kg_first = TAILQ_FIRST(&zombie_ksegrps); 413 if (td_first) 414 TAILQ_INIT(&zombie_threads); 415 if (kg_first) 416 TAILQ_INIT(&zombie_ksegrps); 417 mtx_unlock_spin(&kse_zombie_lock); 418 while (td_first) { 419 td_next = TAILQ_NEXT(td_first, td_runq); 420 if (td_first->td_ucred) 421 crfree(td_first->td_ucred); 422 thread_free(td_first); 423 td_first = td_next; 424 } 425 while (kg_first) { 426 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 427 ksegrp_free(kg_first); 428 kg_first = kg_next; 429 } 430 /* 431 * there will always be a thread on the list if one of these 432 * is there. 433 */ 434 kse_GC(); 435 } 436 } 437 438 /* 439 * Allocate a ksegrp. 440 */ 441 struct ksegrp * 442 ksegrp_alloc(void) 443 { 444 return (uma_zalloc(ksegrp_zone, M_WAITOK)); 445 } 446 447 /* 448 * Allocate a thread. 449 */ 450 struct thread * 451 thread_alloc(void) 452 { 453 thread_reap(); /* check if any zombies to get */ 454 return (uma_zalloc(thread_zone, M_WAITOK)); 455 } 456 457 /* 458 * Deallocate a ksegrp. 459 */ 460 void 461 ksegrp_free(struct ksegrp *td) 462 { 463 uma_zfree(ksegrp_zone, td); 464 } 465 466 /* 467 * Deallocate a thread. 468 */ 469 void 470 thread_free(struct thread *td) 471 { 472 473 cpu_thread_clean(td); 474 uma_zfree(thread_zone, td); 475 } 476 477 /* 478 * Discard the current thread and exit from its context. 479 * Always called with scheduler locked. 480 * 481 * Because we can't free a thread while we're operating under its context, 482 * push the current thread into our CPU's deadthread holder. This means 483 * we needn't worry about someone else grabbing our context before we 484 * do a cpu_throw(). This may not be needed now as we are under schedlock. 485 * Maybe we can just do a thread_stash() as thr_exit1 does. 486 */ 487 /* XXX 488 * libthr expects its thread exit to return for the last 489 * thread, meaning that the program is back to non-threaded 490 * mode I guess. Because we do this (cpu_throw) unconditionally 491 * here, they have their own version of it. (thr_exit1()) 492 * that doesn't do it all if this was the last thread. 493 * It is also called from thread_suspend_check(). 494 * Of course in the end, they end up coming here through exit1 495 * anyhow.. After fixing 'thr' to play by the rules we should be able 496 * to merge these two functions together. 497 * 498 * called from: 499 * exit1() 500 * kse_exit() 501 * thr_exit() 502 * thread_user_enter() 503 * thread_userret() 504 * thread_suspend_check() 505 */ 506 void 507 thread_exit(void) 508 { 509 struct thread *td; 510 struct proc *p; 511 struct ksegrp *kg; 512 513 td = curthread; 514 kg = td->td_ksegrp; 515 p = td->td_proc; 516 517 mtx_assert(&sched_lock, MA_OWNED); 518 mtx_assert(&Giant, MA_NOTOWNED); 519 PROC_LOCK_ASSERT(p, MA_OWNED); 520 KASSERT(p != NULL, ("thread exiting without a process")); 521 KASSERT(kg != NULL, ("thread exiting without a kse group")); 522 CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td, 523 (long)p->p_pid, p->p_comm); 524 525 if (td->td_standin != NULL) { 526 /* 527 * Note that we don't need to free the cred here as it 528 * is done in thread_reap(). 529 */ 530 thread_stash(td->td_standin); 531 td->td_standin = NULL; 532 } 533 534 /* 535 * drop FPU & debug register state storage, or any other 536 * architecture specific resources that 537 * would not be on a new untouched process. 538 */ 539 cpu_thread_exit(td); /* XXXSMP */ 540 541 /* 542 * The thread is exiting. scheduler can release its stuff 543 * and collect stats etc. 544 */ 545 sched_thread_exit(td); 546 547 /* 548 * The last thread is left attached to the process 549 * So that the whole bundle gets recycled. Skip 550 * all this stuff if we never had threads. 551 * EXIT clears all sign of other threads when 552 * it goes to single threading, so the last thread always 553 * takes the short path. 554 */ 555 if (p->p_flag & P_HADTHREADS) { 556 if (p->p_numthreads > 1) { 557 thread_unlink(td); 558 559 /* XXX first arg not used in 4BSD or ULE */ 560 sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); 561 562 /* 563 * as we are exiting there is room for another 564 * to be created. 565 */ 566 if (p->p_maxthrwaits) 567 wakeup(&p->p_numthreads); 568 569 /* 570 * The test below is NOT true if we are the 571 * sole exiting thread. P_STOPPED_SNGL is unset 572 * in exit1() after it is the only survivor. 573 */ 574 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 575 if (p->p_numthreads == p->p_suspcount) { 576 thread_unsuspend_one(p->p_singlethread); 577 } 578 } 579 580 /* 581 * Because each upcall structure has an owner thread, 582 * owner thread exits only when process is in exiting 583 * state, so upcall to userland is no longer needed, 584 * deleting upcall structure is safe here. 585 * So when all threads in a group is exited, all upcalls 586 * in the group should be automatically freed. 587 * XXXKSE This is a KSE thing and should be exported 588 * there somehow. 589 */ 590 upcall_remove(td); 591 592 /* 593 * If the thread we unlinked above was the last one, 594 * then this ksegrp should go away too. 595 */ 596 if (kg->kg_numthreads == 0) { 597 /* 598 * let the scheduler know about this in case 599 * it needs to recover stats or resources. 600 * Theoretically we could let 601 * sched_exit_ksegrp() do the equivalent of 602 * setting the concurrency to 0 603 * but don't do it yet to avoid changing 604 * the existing scheduler code until we 605 * are ready. 606 * We supply a random other ksegrp 607 * as the recipient of any built up 608 * cpu usage etc. (If the scheduler wants it). 609 * XXXKSE 610 * This is probably not fair so think of 611 * a better answer. 612 */ 613 sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); 614 sched_set_concurrency(kg, 0); /* XXX TEMP */ 615 ksegrp_unlink(kg); 616 ksegrp_stash(kg); 617 } 618 PROC_UNLOCK(p); 619 td->td_ksegrp = NULL; 620 PCPU_SET(deadthread, td); 621 } else { 622 /* 623 * The last thread is exiting.. but not through exit() 624 * what should we do? 625 * Theoretically this can't happen 626 * exit1() - clears threading flags before coming here 627 * kse_exit() - treats last thread specially 628 * thr_exit() - treats last thread specially 629 * thread_user_enter() - only if more exist 630 * thread_userret() - only if more exist 631 * thread_suspend_check() - only if more exist 632 */ 633 panic ("thread_exit: Last thread exiting on its own"); 634 } 635 } else { 636 /* 637 * non threaded process comes here. 638 * This includes an EX threaded process that is coming 639 * here via exit1(). (exit1 dethreads the proc first). 640 */ 641 PROC_UNLOCK(p); 642 } 643 td->td_state = TDS_INACTIVE; 644 CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td); 645 cpu_throw(td, choosethread()); 646 panic("I'm a teapot!"); 647 /* NOTREACHED */ 648 } 649 650 /* 651 * Do any thread specific cleanups that may be needed in wait() 652 * called with Giant, proc and schedlock not held. 653 */ 654 void 655 thread_wait(struct proc *p) 656 { 657 struct thread *td; 658 659 mtx_assert(&Giant, MA_NOTOWNED); 660 KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()")); 661 KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()")); 662 FOREACH_THREAD_IN_PROC(p, td) { 663 if (td->td_standin != NULL) { 664 crfree(td->td_ucred); 665 td->td_ucred = NULL; 666 thread_free(td->td_standin); 667 td->td_standin = NULL; 668 } 669 cpu_thread_clean(td); 670 crfree(td->td_ucred); 671 } 672 thread_reap(); /* check for zombie threads etc. */ 673 } 674 675 /* 676 * Link a thread to a process. 677 * set up anything that needs to be initialized for it to 678 * be used by the process. 679 * 680 * Note that we do not link to the proc's ucred here. 681 * The thread is linked as if running but no KSE assigned. 682 * Called from: 683 * proc_linkup() 684 * thread_schedule_upcall() 685 * thr_create() 686 */ 687 void 688 thread_link(struct thread *td, struct ksegrp *kg) 689 { 690 struct proc *p; 691 692 p = kg->kg_proc; 693 td->td_state = TDS_INACTIVE; 694 td->td_proc = p; 695 td->td_ksegrp = kg; 696 td->td_flags = 0; 697 td->td_kflags = 0; 698 699 LIST_INIT(&td->td_contested); 700 callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); 701 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 702 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 703 p->p_numthreads++; 704 kg->kg_numthreads++; 705 } 706 707 /* 708 * Convert a process with one thread to an unthreaded process. 709 * Called from: 710 * thread_single(exit) (called from execve and exit) 711 * kse_exit() XXX may need cleaning up wrt KSE stuff 712 */ 713 void 714 thread_unthread(struct thread *td) 715 { 716 struct proc *p = td->td_proc; 717 718 KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads")); 719 upcall_remove(td); 720 p->p_flag &= ~(P_SA|P_HADTHREADS); 721 td->td_mailbox = NULL; 722 td->td_pflags &= ~(TDP_SA | TDP_CAN_UNBIND); 723 if (td->td_standin != NULL) { 724 thread_stash(td->td_standin); 725 td->td_standin = NULL; 726 } 727 sched_set_concurrency(td->td_ksegrp, 1); 728 } 729 730 /* 731 * Called from: 732 * thread_exit() 733 */ 734 void 735 thread_unlink(struct thread *td) 736 { 737 struct proc *p = td->td_proc; 738 struct ksegrp *kg = td->td_ksegrp; 739 740 mtx_assert(&sched_lock, MA_OWNED); 741 TAILQ_REMOVE(&p->p_threads, td, td_plist); 742 p->p_numthreads--; 743 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 744 kg->kg_numthreads--; 745 /* could clear a few other things here */ 746 /* Must NOT clear links to proc and ksegrp! */ 747 } 748 749 /* 750 * Enforce single-threading. 751 * 752 * Returns 1 if the caller must abort (another thread is waiting to 753 * exit the process or similar). Process is locked! 754 * Returns 0 when you are successfully the only thread running. 755 * A process has successfully single threaded in the suspend mode when 756 * There are no threads in user mode. Threads in the kernel must be 757 * allowed to continue until they get to the user boundary. They may even 758 * copy out their return values and data before suspending. They may however be 759 * accellerated in reaching the user boundary as we will wake up 760 * any sleeping threads that are interruptable. (PCATCH). 761 */ 762 int 763 thread_single(int mode) 764 { 765 struct thread *td; 766 struct thread *td2; 767 struct proc *p; 768 int remaining; 769 770 td = curthread; 771 p = td->td_proc; 772 mtx_assert(&Giant, MA_NOTOWNED); 773 PROC_LOCK_ASSERT(p, MA_OWNED); 774 KASSERT((td != NULL), ("curthread is NULL")); 775 776 if ((p->p_flag & P_HADTHREADS) == 0) 777 return (0); 778 779 /* Is someone already single threading? */ 780 if (p->p_singlethread != NULL && p->p_singlethread != td) 781 return (1); 782 783 if (mode == SINGLE_EXIT) { 784 p->p_flag |= P_SINGLE_EXIT; 785 p->p_flag &= ~P_SINGLE_BOUNDARY; 786 } else { 787 p->p_flag &= ~P_SINGLE_EXIT; 788 if (mode == SINGLE_BOUNDARY) 789 p->p_flag |= P_SINGLE_BOUNDARY; 790 else 791 p->p_flag &= ~P_SINGLE_BOUNDARY; 792 } 793 p->p_flag |= P_STOPPED_SINGLE; 794 mtx_lock_spin(&sched_lock); 795 p->p_singlethread = td; 796 if (mode == SINGLE_EXIT) 797 remaining = p->p_numthreads; 798 else if (mode == SINGLE_BOUNDARY) 799 remaining = p->p_numthreads - p->p_boundary_count; 800 else 801 remaining = p->p_numthreads - p->p_suspcount; 802 while (remaining != 1) { 803 FOREACH_THREAD_IN_PROC(p, td2) { 804 if (td2 == td) 805 continue; 806 td2->td_flags |= TDF_ASTPENDING; 807 if (TD_IS_INHIBITED(td2)) { 808 switch (mode) { 809 case SINGLE_EXIT: 810 if (td->td_flags & TDF_DBSUSPEND) 811 td->td_flags &= ~TDF_DBSUSPEND; 812 if (TD_IS_SUSPENDED(td2)) 813 thread_unsuspend_one(td2); 814 if (TD_ON_SLEEPQ(td2) && 815 (td2->td_flags & TDF_SINTR)) 816 sleepq_abort(td2); 817 break; 818 case SINGLE_BOUNDARY: 819 if (TD_IS_SUSPENDED(td2) && 820 !(td2->td_flags & TDF_BOUNDARY)) 821 thread_unsuspend_one(td2); 822 if (TD_ON_SLEEPQ(td2) && 823 (td2->td_flags & TDF_SINTR)) 824 sleepq_abort(td2); 825 break; 826 default: 827 if (TD_IS_SUSPENDED(td2)) 828 continue; 829 /* 830 * maybe other inhibitted states too? 831 */ 832 if ((td2->td_flags & TDF_SINTR) && 833 (td2->td_inhibitors & 834 (TDI_SLEEPING | TDI_SWAPPED))) 835 thread_suspend_one(td2); 836 break; 837 } 838 } 839 } 840 if (mode == SINGLE_EXIT) 841 remaining = p->p_numthreads; 842 else if (mode == SINGLE_BOUNDARY) 843 remaining = p->p_numthreads - p->p_boundary_count; 844 else 845 remaining = p->p_numthreads - p->p_suspcount; 846 847 /* 848 * Maybe we suspended some threads.. was it enough? 849 */ 850 if (remaining == 1) 851 break; 852 853 /* 854 * Wake us up when everyone else has suspended. 855 * In the mean time we suspend as well. 856 */ 857 thread_suspend_one(td); 858 PROC_UNLOCK(p); 859 mi_switch(SW_VOL, NULL); 860 mtx_unlock_spin(&sched_lock); 861 PROC_LOCK(p); 862 mtx_lock_spin(&sched_lock); 863 if (mode == SINGLE_EXIT) 864 remaining = p->p_numthreads; 865 else if (mode == SINGLE_BOUNDARY) 866 remaining = p->p_numthreads - p->p_boundary_count; 867 else 868 remaining = p->p_numthreads - p->p_suspcount; 869 } 870 if (mode == SINGLE_EXIT) { 871 /* 872 * We have gotten rid of all the other threads and we 873 * are about to either exit or exec. In either case, 874 * we try our utmost to revert to being a non-threaded 875 * process. 876 */ 877 p->p_singlethread = NULL; 878 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT); 879 thread_unthread(td); 880 } 881 mtx_unlock_spin(&sched_lock); 882 return (0); 883 } 884 885 /* 886 * Called in from locations that can safely check to see 887 * whether we have to suspend or at least throttle for a 888 * single-thread event (e.g. fork). 889 * 890 * Such locations include userret(). 891 * If the "return_instead" argument is non zero, the thread must be able to 892 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 893 * 894 * The 'return_instead' argument tells the function if it may do a 895 * thread_exit() or suspend, or whether the caller must abort and back 896 * out instead. 897 * 898 * If the thread that set the single_threading request has set the 899 * P_SINGLE_EXIT bit in the process flags then this call will never return 900 * if 'return_instead' is false, but will exit. 901 * 902 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 903 *---------------+--------------------+--------------------- 904 * 0 | returns 0 | returns 0 or 1 905 * | when ST ends | immediatly 906 *---------------+--------------------+--------------------- 907 * 1 | thread exits | returns 1 908 * | | immediatly 909 * 0 = thread_exit() or suspension ok, 910 * other = return error instead of stopping the thread. 911 * 912 * While a full suspension is under effect, even a single threading 913 * thread would be suspended if it made this call (but it shouldn't). 914 * This call should only be made from places where 915 * thread_exit() would be safe as that may be the outcome unless 916 * return_instead is set. 917 */ 918 int 919 thread_suspend_check(int return_instead) 920 { 921 struct thread *td; 922 struct proc *p; 923 924 td = curthread; 925 p = td->td_proc; 926 mtx_assert(&Giant, MA_NOTOWNED); 927 PROC_LOCK_ASSERT(p, MA_OWNED); 928 while (P_SHOULDSTOP(p) || 929 ((p->p_flag & P_TRACED) && (td->td_flags & TDF_DBSUSPEND))) { 930 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 931 KASSERT(p->p_singlethread != NULL, 932 ("singlethread not set")); 933 /* 934 * The only suspension in action is a 935 * single-threading. Single threader need not stop. 936 * XXX Should be safe to access unlocked 937 * as it can only be set to be true by us. 938 */ 939 if (p->p_singlethread == td) 940 return (0); /* Exempt from stopping. */ 941 } 942 if ((p->p_flag & P_SINGLE_EXIT) && return_instead) 943 return (1); 944 945 /* Should we goto user boundary if we didn't come from there? */ 946 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE && 947 (p->p_flag & P_SINGLE_BOUNDARY) && return_instead) 948 return (1); 949 950 mtx_lock_spin(&sched_lock); 951 thread_stopped(p); 952 /* 953 * If the process is waiting for us to exit, 954 * this thread should just suicide. 955 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 956 */ 957 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) 958 thread_exit(); 959 960 /* 961 * When a thread suspends, it just 962 * moves to the processes's suspend queue 963 * and stays there. 964 */ 965 thread_suspend_one(td); 966 if (return_instead == 0) { 967 p->p_boundary_count++; 968 td->td_flags |= TDF_BOUNDARY; 969 } 970 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 971 if (p->p_numthreads == p->p_suspcount) 972 thread_unsuspend_one(p->p_singlethread); 973 } 974 PROC_UNLOCK(p); 975 mi_switch(SW_INVOL, NULL); 976 if (return_instead == 0) { 977 p->p_boundary_count--; 978 td->td_flags &= ~TDF_BOUNDARY; 979 } 980 mtx_unlock_spin(&sched_lock); 981 PROC_LOCK(p); 982 } 983 return (0); 984 } 985 986 void 987 thread_suspend_one(struct thread *td) 988 { 989 struct proc *p = td->td_proc; 990 991 mtx_assert(&sched_lock, MA_OWNED); 992 PROC_LOCK_ASSERT(p, MA_OWNED); 993 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); 994 p->p_suspcount++; 995 TD_SET_SUSPENDED(td); 996 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 997 /* 998 * Hack: If we are suspending but are on the sleep queue 999 * then we are in msleep or the cv equivalent. We 1000 * want to look like we have two Inhibitors. 1001 * May already be set.. doesn't matter. 1002 */ 1003 if (TD_ON_SLEEPQ(td)) 1004 TD_SET_SLEEPING(td); 1005 } 1006 1007 void 1008 thread_unsuspend_one(struct thread *td) 1009 { 1010 struct proc *p = td->td_proc; 1011 1012 mtx_assert(&sched_lock, MA_OWNED); 1013 PROC_LOCK_ASSERT(p, MA_OWNED); 1014 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1015 TD_CLR_SUSPENDED(td); 1016 p->p_suspcount--; 1017 setrunnable(td); 1018 } 1019 1020 /* 1021 * Allow all threads blocked by single threading to continue running. 1022 */ 1023 void 1024 thread_unsuspend(struct proc *p) 1025 { 1026 struct thread *td; 1027 1028 mtx_assert(&sched_lock, MA_OWNED); 1029 PROC_LOCK_ASSERT(p, MA_OWNED); 1030 if (!P_SHOULDSTOP(p)) { 1031 while ((td = TAILQ_FIRST(&p->p_suspended))) { 1032 thread_unsuspend_one(td); 1033 } 1034 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1035 (p->p_numthreads == p->p_suspcount)) { 1036 /* 1037 * Stopping everything also did the job for the single 1038 * threading request. Now we've downgraded to single-threaded, 1039 * let it continue. 1040 */ 1041 thread_unsuspend_one(p->p_singlethread); 1042 } 1043 } 1044 1045 /* 1046 * End the single threading mode.. 1047 */ 1048 void 1049 thread_single_end(void) 1050 { 1051 struct thread *td; 1052 struct proc *p; 1053 1054 td = curthread; 1055 p = td->td_proc; 1056 PROC_LOCK_ASSERT(p, MA_OWNED); 1057 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY); 1058 mtx_lock_spin(&sched_lock); 1059 p->p_singlethread = NULL; 1060 /* 1061 * If there are other threads they mey now run, 1062 * unless of course there is a blanket 'stop order' 1063 * on the process. The single threader must be allowed 1064 * to continue however as this is a bad place to stop. 1065 */ 1066 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 1067 while ((td = TAILQ_FIRST(&p->p_suspended))) { 1068 thread_unsuspend_one(td); 1069 } 1070 } 1071 mtx_unlock_spin(&sched_lock); 1072 } 1073 1074 /* 1075 * Called before going into an interruptible sleep to see if we have been 1076 * interrupted or requested to exit. 1077 */ 1078 int 1079 thread_sleep_check(struct thread *td) 1080 { 1081 struct proc *p; 1082 1083 p = td->td_proc; 1084 mtx_assert(&sched_lock, MA_OWNED); 1085 if (p->p_flag & P_HADTHREADS) { 1086 if (p->p_singlethread != td) { 1087 if (p->p_flag & P_SINGLE_EXIT) 1088 return (EINTR); 1089 if (p->p_flag & P_SINGLE_BOUNDARY) 1090 return (ERESTART); 1091 } 1092 if (td->td_flags & TDF_INTERRUPT) 1093 return (td->td_intrval); 1094 } 1095 return (0); 1096 } 1097