1 /* 2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice(s), this list of conditions and the following disclaimer as 10 * the first lines of this file unmodified other than the possible 11 * addition of one or more copyright notices. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice(s), this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/lock.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/smp.h> 39 #include <sys/sysctl.h> 40 #include <sys/sched.h> 41 #include <sys/sleepqueue.h> 42 #include <sys/turnstile.h> 43 #include <sys/ktr.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_extern.h> 47 #include <vm/uma.h> 48 49 /* 50 * KSEGRP related storage. 51 */ 52 static uma_zone_t ksegrp_zone; 53 static uma_zone_t thread_zone; 54 55 /* DEBUG ONLY */ 56 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 57 static int thread_debug = 0; 58 SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW, 59 &thread_debug, 0, "thread debug"); 60 61 int max_threads_per_proc = 1500; 62 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 63 &max_threads_per_proc, 0, "Limit on threads per proc"); 64 65 int max_groups_per_proc = 1500; 66 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 67 &max_groups_per_proc, 0, "Limit on thread groups per proc"); 68 69 int max_threads_hits; 70 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD, 71 &max_threads_hits, 0, ""); 72 73 int virtual_cpu; 74 75 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 76 77 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 78 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 79 struct mtx kse_zombie_lock; 80 MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN); 81 82 static int 83 sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS) 84 { 85 int error, new_val; 86 int def_val; 87 88 def_val = mp_ncpus; 89 if (virtual_cpu == 0) 90 new_val = def_val; 91 else 92 new_val = virtual_cpu; 93 error = sysctl_handle_int(oidp, &new_val, 0, req); 94 if (error != 0 || req->newptr == NULL) 95 return (error); 96 if (new_val < 0) 97 return (EINVAL); 98 virtual_cpu = new_val; 99 return (0); 100 } 101 102 /* DEBUG ONLY */ 103 SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW, 104 0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I", 105 "debug virtual cpus"); 106 107 /* 108 * Thread ID allocator. The allocator keeps track of assigned IDs by 109 * using a bitmap. The bitmap is created in parts. The parts are linked 110 * together. 111 */ 112 typedef u_long tid_bitmap_word; 113 114 #define TID_IDS_PER_PART 1024 115 #define TID_IDS_PER_IDX (sizeof(tid_bitmap_word) << 3) 116 #define TID_BITMAP_SIZE (TID_IDS_PER_PART / TID_IDS_PER_IDX) 117 #define TID_MIN (PID_MAX + 1) 118 119 struct tid_bitmap_part { 120 STAILQ_ENTRY(tid_bitmap_part) bmp_next; 121 tid_bitmap_word bmp_bitmap[TID_BITMAP_SIZE]; 122 lwpid_t bmp_base; 123 int bmp_free; 124 }; 125 126 static STAILQ_HEAD(, tid_bitmap_part) tid_bitmap = 127 STAILQ_HEAD_INITIALIZER(tid_bitmap); 128 static uma_zone_t tid_zone; 129 130 struct mtx tid_lock; 131 MTX_SYSINIT(tid_lock, &tid_lock, "TID lock", MTX_DEF); 132 133 /* 134 * Prepare a thread for use. 135 */ 136 static int 137 thread_ctor(void *mem, int size, void *arg, int flags) 138 { 139 struct thread *td; 140 141 td = (struct thread *)mem; 142 td->td_state = TDS_INACTIVE; 143 td->td_oncpu = NOCPU; 144 145 /* 146 * Note that td_critnest begins life as 1 because the thread is not 147 * running and is thereby implicitly waiting to be on the receiving 148 * end of a context switch. A context switch must occur inside a 149 * critical section, and in fact, includes hand-off of the sched_lock. 150 * After a context switch to a newly created thread, it will release 151 * sched_lock for the first time, and its td_critnest will hit 0 for 152 * the first time. This happens on the far end of a context switch, 153 * and when it context switches away from itself, it will in fact go 154 * back into a critical section, and hand off the sched lock to the 155 * next thread. 156 */ 157 td->td_critnest = 1; 158 return (0); 159 } 160 161 /* 162 * Reclaim a thread after use. 163 */ 164 static void 165 thread_dtor(void *mem, int size, void *arg) 166 { 167 struct thread *td; 168 169 td = (struct thread *)mem; 170 171 #ifdef INVARIANTS 172 /* Verify that this thread is in a safe state to free. */ 173 switch (td->td_state) { 174 case TDS_INHIBITED: 175 case TDS_RUNNING: 176 case TDS_CAN_RUN: 177 case TDS_RUNQ: 178 /* 179 * We must never unlink a thread that is in one of 180 * these states, because it is currently active. 181 */ 182 panic("bad state for thread unlinking"); 183 /* NOTREACHED */ 184 case TDS_INACTIVE: 185 break; 186 default: 187 panic("bad thread state"); 188 /* NOTREACHED */ 189 } 190 #endif 191 sched_newthread(td); 192 } 193 194 /* 195 * Initialize type-stable parts of a thread (when newly created). 196 */ 197 static int 198 thread_init(void *mem, int size, int flags) 199 { 200 struct thread *td; 201 struct tid_bitmap_part *bmp, *new; 202 int bit, idx; 203 204 td = (struct thread *)mem; 205 206 mtx_lock(&tid_lock); 207 STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 208 if (bmp->bmp_free) 209 break; 210 } 211 /* Create a new bitmap if we run out of free bits. */ 212 if (bmp == NULL) { 213 mtx_unlock(&tid_lock); 214 new = uma_zalloc(tid_zone, M_WAITOK); 215 mtx_lock(&tid_lock); 216 bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next); 217 if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) { 218 /* 1=free, 0=assigned. This way we can use ffsl(). */ 219 memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap)); 220 new->bmp_base = (bmp == NULL) ? TID_MIN : 221 bmp->bmp_base + TID_IDS_PER_PART; 222 new->bmp_free = TID_IDS_PER_PART; 223 STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next); 224 bmp = new; 225 new = NULL; 226 } 227 } else 228 new = NULL; 229 /* We have a bitmap with available IDs. */ 230 idx = 0; 231 while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL) 232 idx++; 233 bit = ffsl(bmp->bmp_bitmap[idx]) - 1; 234 td->td_tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit; 235 bmp->bmp_bitmap[idx] &= ~(1UL << bit); 236 bmp->bmp_free--; 237 mtx_unlock(&tid_lock); 238 if (new != NULL) 239 uma_zfree(tid_zone, new); 240 241 vm_thread_new(td, 0); 242 cpu_thread_setup(td); 243 td->td_sleepqueue = sleepq_alloc(); 244 td->td_turnstile = turnstile_alloc(); 245 td->td_sched = (struct td_sched *)&td[1]; 246 sched_newthread(td); 247 return (0); 248 } 249 250 /* 251 * Tear down type-stable parts of a thread (just before being discarded). 252 */ 253 static void 254 thread_fini(void *mem, int size) 255 { 256 struct thread *td; 257 struct tid_bitmap_part *bmp; 258 lwpid_t tid; 259 int bit, idx; 260 261 td = (struct thread *)mem; 262 turnstile_free(td->td_turnstile); 263 sleepq_free(td->td_sleepqueue); 264 vm_thread_dispose(td); 265 266 STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 267 if (td->td_tid >= bmp->bmp_base && 268 td->td_tid < bmp->bmp_base + TID_IDS_PER_PART) 269 break; 270 } 271 KASSERT(bmp != NULL, ("No TID bitmap?")); 272 mtx_lock(&tid_lock); 273 tid = td->td_tid - bmp->bmp_base; 274 idx = tid / TID_IDS_PER_IDX; 275 bit = 1UL << (tid % TID_IDS_PER_IDX); 276 bmp->bmp_bitmap[idx] |= bit; 277 bmp->bmp_free++; 278 mtx_unlock(&tid_lock); 279 } 280 281 /* 282 * Initialize type-stable parts of a ksegrp (when newly created). 283 */ 284 static int 285 ksegrp_ctor(void *mem, int size, void *arg, int flags) 286 { 287 struct ksegrp *kg; 288 289 kg = (struct ksegrp *)mem; 290 bzero(mem, size); 291 kg->kg_sched = (struct kg_sched *)&kg[1]; 292 return (0); 293 } 294 295 void 296 ksegrp_link(struct ksegrp *kg, struct proc *p) 297 { 298 299 TAILQ_INIT(&kg->kg_threads); 300 TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 301 TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */ 302 TAILQ_INIT(&kg->kg_upcalls); /* all upcall structure in ksegrp */ 303 kg->kg_proc = p; 304 /* 305 * the following counters are in the -zero- section 306 * and may not need clearing 307 */ 308 kg->kg_numthreads = 0; 309 kg->kg_runnable = 0; 310 kg->kg_numupcalls = 0; 311 /* link it in now that it's consistent */ 312 p->p_numksegrps++; 313 TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 314 } 315 316 /* 317 * Called from: 318 * thread-exit() 319 */ 320 void 321 ksegrp_unlink(struct ksegrp *kg) 322 { 323 struct proc *p; 324 325 mtx_assert(&sched_lock, MA_OWNED); 326 KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads")); 327 KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls")); 328 329 p = kg->kg_proc; 330 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 331 p->p_numksegrps--; 332 /* 333 * Aggregate stats from the KSE 334 */ 335 } 336 337 /* 338 * For a newly created process, 339 * link up all the structures and its initial threads etc. 340 * called from: 341 * {arch}/{arch}/machdep.c ia64_init(), init386() etc. 342 * proc_dtor() (should go away) 343 * proc_init() 344 */ 345 void 346 proc_linkup(struct proc *p, struct ksegrp *kg, struct thread *td) 347 { 348 349 TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 350 TAILQ_INIT(&p->p_threads); /* all threads in proc */ 351 TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 352 p->p_numksegrps = 0; 353 p->p_numthreads = 0; 354 355 ksegrp_link(kg, p); 356 thread_link(td, kg); 357 } 358 359 /* 360 * Initialize global thread allocation resources. 361 */ 362 void 363 threadinit(void) 364 { 365 366 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 367 thread_ctor, thread_dtor, thread_init, thread_fini, 368 UMA_ALIGN_CACHE, 0); 369 tid_zone = uma_zcreate("TID", sizeof(struct tid_bitmap_part), 370 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); 371 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 372 ksegrp_ctor, NULL, NULL, NULL, 373 UMA_ALIGN_CACHE, 0); 374 kseinit(); /* set up kse specific stuff e.g. upcall zone*/ 375 } 376 377 /* 378 * Stash an embarasingly extra thread into the zombie thread queue. 379 */ 380 void 381 thread_stash(struct thread *td) 382 { 383 mtx_lock_spin(&kse_zombie_lock); 384 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 385 mtx_unlock_spin(&kse_zombie_lock); 386 } 387 388 /* 389 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 390 */ 391 void 392 ksegrp_stash(struct ksegrp *kg) 393 { 394 mtx_lock_spin(&kse_zombie_lock); 395 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 396 mtx_unlock_spin(&kse_zombie_lock); 397 } 398 399 /* 400 * Reap zombie kse resource. 401 */ 402 void 403 thread_reap(void) 404 { 405 struct thread *td_first, *td_next; 406 struct ksegrp *kg_first, * kg_next; 407 408 /* 409 * Don't even bother to lock if none at this instant, 410 * we really don't care about the next instant.. 411 */ 412 if ((!TAILQ_EMPTY(&zombie_threads)) 413 || (!TAILQ_EMPTY(&zombie_ksegrps))) { 414 mtx_lock_spin(&kse_zombie_lock); 415 td_first = TAILQ_FIRST(&zombie_threads); 416 kg_first = TAILQ_FIRST(&zombie_ksegrps); 417 if (td_first) 418 TAILQ_INIT(&zombie_threads); 419 if (kg_first) 420 TAILQ_INIT(&zombie_ksegrps); 421 mtx_unlock_spin(&kse_zombie_lock); 422 while (td_first) { 423 td_next = TAILQ_NEXT(td_first, td_runq); 424 if (td_first->td_ucred) 425 crfree(td_first->td_ucred); 426 thread_free(td_first); 427 td_first = td_next; 428 } 429 while (kg_first) { 430 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 431 ksegrp_free(kg_first); 432 kg_first = kg_next; 433 } 434 /* 435 * there will always be a thread on the list if one of these 436 * is there. 437 */ 438 kse_GC(); 439 } 440 } 441 442 /* 443 * Allocate a ksegrp. 444 */ 445 struct ksegrp * 446 ksegrp_alloc(void) 447 { 448 return (uma_zalloc(ksegrp_zone, M_WAITOK)); 449 } 450 451 /* 452 * Allocate a thread. 453 */ 454 struct thread * 455 thread_alloc(void) 456 { 457 thread_reap(); /* check if any zombies to get */ 458 return (uma_zalloc(thread_zone, M_WAITOK)); 459 } 460 461 /* 462 * Deallocate a ksegrp. 463 */ 464 void 465 ksegrp_free(struct ksegrp *td) 466 { 467 uma_zfree(ksegrp_zone, td); 468 } 469 470 /* 471 * Deallocate a thread. 472 */ 473 void 474 thread_free(struct thread *td) 475 { 476 477 cpu_thread_clean(td); 478 uma_zfree(thread_zone, td); 479 } 480 481 /* 482 * Discard the current thread and exit from its context. 483 * Always called with scheduler locked. 484 * 485 * Because we can't free a thread while we're operating under its context, 486 * push the current thread into our CPU's deadthread holder. This means 487 * we needn't worry about someone else grabbing our context before we 488 * do a cpu_throw(). This may not be needed now as we are under schedlock. 489 * Maybe we can just do a thread_stash() as thr_exit1 does. 490 */ 491 /* XXX 492 * libthr expects its thread exit to return for the last 493 * thread, meaning that the program is back to non-threaded 494 * mode I guess. Because we do this (cpu_throw) unconditionally 495 * here, they have their own version of it. (thr_exit1()) 496 * that doesn't do it all if this was the last thread. 497 * It is also called from thread_suspend_check(). 498 * Of course in the end, they end up coming here through exit1 499 * anyhow.. After fixing 'thr' to play by the rules we should be able 500 * to merge these two functions together. 501 * 502 * called from: 503 * exit1() 504 * kse_exit() 505 * thr_exit() 506 * thread_user_enter() 507 * thread_userret() 508 * thread_suspend_check() 509 */ 510 void 511 thread_exit(void) 512 { 513 struct thread *td; 514 struct proc *p; 515 struct ksegrp *kg; 516 517 td = curthread; 518 kg = td->td_ksegrp; 519 p = td->td_proc; 520 521 mtx_assert(&sched_lock, MA_OWNED); 522 mtx_assert(&Giant, MA_NOTOWNED); 523 PROC_LOCK_ASSERT(p, MA_OWNED); 524 KASSERT(p != NULL, ("thread exiting without a process")); 525 KASSERT(kg != NULL, ("thread exiting without a kse group")); 526 CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td, 527 (long)p->p_pid, p->p_comm); 528 529 if (td->td_standin != NULL) { 530 /* 531 * Note that we don't need to free the cred here as it 532 * is done in thread_reap(). 533 */ 534 thread_stash(td->td_standin); 535 td->td_standin = NULL; 536 } 537 538 /* 539 * drop FPU & debug register state storage, or any other 540 * architecture specific resources that 541 * would not be on a new untouched process. 542 */ 543 cpu_thread_exit(td); /* XXXSMP */ 544 545 /* 546 * The thread is exiting. scheduler can release its stuff 547 * and collect stats etc. 548 */ 549 sched_thread_exit(td); 550 551 /* 552 * The last thread is left attached to the process 553 * So that the whole bundle gets recycled. Skip 554 * all this stuff if we never had threads. 555 * EXIT clears all sign of other threads when 556 * it goes to single threading, so the last thread always 557 * takes the short path. 558 */ 559 if (p->p_flag & P_HADTHREADS) { 560 if (p->p_numthreads > 1) { 561 thread_unlink(td); 562 563 /* XXX first arg not used in 4BSD or ULE */ 564 sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); 565 566 /* 567 * as we are exiting there is room for another 568 * to be created. 569 */ 570 if (p->p_maxthrwaits) 571 wakeup(&p->p_numthreads); 572 573 /* 574 * The test below is NOT true if we are the 575 * sole exiting thread. P_STOPPED_SNGL is unset 576 * in exit1() after it is the only survivor. 577 */ 578 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 579 if (p->p_numthreads == p->p_suspcount) { 580 thread_unsuspend_one(p->p_singlethread); 581 } 582 } 583 584 /* 585 * Because each upcall structure has an owner thread, 586 * owner thread exits only when process is in exiting 587 * state, so upcall to userland is no longer needed, 588 * deleting upcall structure is safe here. 589 * So when all threads in a group is exited, all upcalls 590 * in the group should be automatically freed. 591 * XXXKSE This is a KSE thing and should be exported 592 * there somehow. 593 */ 594 upcall_remove(td); 595 596 /* 597 * If the thread we unlinked above was the last one, 598 * then this ksegrp should go away too. 599 */ 600 if (kg->kg_numthreads == 0) { 601 /* 602 * let the scheduler know about this in case 603 * it needs to recover stats or resources. 604 * Theoretically we could let 605 * sched_exit_ksegrp() do the equivalent of 606 * setting the concurrency to 0 607 * but don't do it yet to avoid changing 608 * the existing scheduler code until we 609 * are ready. 610 * We supply a random other ksegrp 611 * as the recipient of any built up 612 * cpu usage etc. (If the scheduler wants it). 613 * XXXKSE 614 * This is probably not fair so think of 615 * a better answer. 616 */ 617 sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); 618 sched_set_concurrency(kg, 0); /* XXX TEMP */ 619 ksegrp_unlink(kg); 620 ksegrp_stash(kg); 621 } 622 PROC_UNLOCK(p); 623 td->td_ksegrp = NULL; 624 PCPU_SET(deadthread, td); 625 } else { 626 /* 627 * The last thread is exiting.. but not through exit() 628 * what should we do? 629 * Theoretically this can't happen 630 * exit1() - clears threading flags before coming here 631 * kse_exit() - treats last thread specially 632 * thr_exit() - treats last thread specially 633 * thread_user_enter() - only if more exist 634 * thread_userret() - only if more exist 635 * thread_suspend_check() - only if more exist 636 */ 637 panic ("thread_exit: Last thread exiting on its own"); 638 } 639 } else { 640 /* 641 * non threaded process comes here. 642 * This includes an EX threaded process that is coming 643 * here via exit1(). (exit1 dethreads the proc first). 644 */ 645 PROC_UNLOCK(p); 646 } 647 td->td_state = TDS_INACTIVE; 648 CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td); 649 cpu_throw(td, choosethread()); 650 panic("I'm a teapot!"); 651 /* NOTREACHED */ 652 } 653 654 /* 655 * Do any thread specific cleanups that may be needed in wait() 656 * called with Giant, proc and schedlock not held. 657 */ 658 void 659 thread_wait(struct proc *p) 660 { 661 struct thread *td; 662 663 mtx_assert(&Giant, MA_NOTOWNED); 664 KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()")); 665 KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()")); 666 FOREACH_THREAD_IN_PROC(p, td) { 667 if (td->td_standin != NULL) { 668 crfree(td->td_ucred); 669 td->td_ucred = NULL; 670 thread_free(td->td_standin); 671 td->td_standin = NULL; 672 } 673 cpu_thread_clean(td); 674 crfree(td->td_ucred); 675 } 676 thread_reap(); /* check for zombie threads etc. */ 677 } 678 679 /* 680 * Link a thread to a process. 681 * set up anything that needs to be initialized for it to 682 * be used by the process. 683 * 684 * Note that we do not link to the proc's ucred here. 685 * The thread is linked as if running but no KSE assigned. 686 * Called from: 687 * proc_linkup() 688 * thread_schedule_upcall() 689 * thr_create() 690 */ 691 void 692 thread_link(struct thread *td, struct ksegrp *kg) 693 { 694 struct proc *p; 695 696 p = kg->kg_proc; 697 td->td_state = TDS_INACTIVE; 698 td->td_proc = p; 699 td->td_ksegrp = kg; 700 td->td_flags = 0; 701 td->td_kflags = 0; 702 703 LIST_INIT(&td->td_contested); 704 callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); 705 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 706 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 707 p->p_numthreads++; 708 kg->kg_numthreads++; 709 } 710 711 /* 712 * Convert a process with one thread to an unthreaded process. 713 * Called from: 714 * thread_single(exit) (called from execve and exit) 715 * kse_exit() XXX may need cleaning up wrt KSE stuff 716 */ 717 void 718 thread_unthread(struct thread *td) 719 { 720 struct proc *p = td->td_proc; 721 722 KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads")); 723 upcall_remove(td); 724 p->p_flag &= ~(P_SA|P_HADTHREADS); 725 td->td_mailbox = NULL; 726 td->td_pflags &= ~(TDP_SA | TDP_CAN_UNBIND); 727 if (td->td_standin != NULL) { 728 thread_stash(td->td_standin); 729 td->td_standin = NULL; 730 } 731 sched_set_concurrency(td->td_ksegrp, 1); 732 } 733 734 /* 735 * Called from: 736 * thread_exit() 737 */ 738 void 739 thread_unlink(struct thread *td) 740 { 741 struct proc *p = td->td_proc; 742 struct ksegrp *kg = td->td_ksegrp; 743 744 mtx_assert(&sched_lock, MA_OWNED); 745 TAILQ_REMOVE(&p->p_threads, td, td_plist); 746 p->p_numthreads--; 747 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 748 kg->kg_numthreads--; 749 /* could clear a few other things here */ 750 /* Must NOT clear links to proc and ksegrp! */ 751 } 752 753 /* 754 * Enforce single-threading. 755 * 756 * Returns 1 if the caller must abort (another thread is waiting to 757 * exit the process or similar). Process is locked! 758 * Returns 0 when you are successfully the only thread running. 759 * A process has successfully single threaded in the suspend mode when 760 * There are no threads in user mode. Threads in the kernel must be 761 * allowed to continue until they get to the user boundary. They may even 762 * copy out their return values and data before suspending. They may however be 763 * accellerated in reaching the user boundary as we will wake up 764 * any sleeping threads that are interruptable. (PCATCH). 765 */ 766 int 767 thread_single(int mode) 768 { 769 struct thread *td; 770 struct thread *td2; 771 struct proc *p; 772 int remaining; 773 774 td = curthread; 775 p = td->td_proc; 776 mtx_assert(&Giant, MA_NOTOWNED); 777 PROC_LOCK_ASSERT(p, MA_OWNED); 778 KASSERT((td != NULL), ("curthread is NULL")); 779 780 if ((p->p_flag & P_HADTHREADS) == 0) 781 return (0); 782 783 /* Is someone already single threading? */ 784 if (p->p_singlethread != NULL && p->p_singlethread != td) 785 return (1); 786 787 if (mode == SINGLE_EXIT) { 788 p->p_flag |= P_SINGLE_EXIT; 789 p->p_flag &= ~P_SINGLE_BOUNDARY; 790 } else { 791 p->p_flag &= ~P_SINGLE_EXIT; 792 if (mode == SINGLE_BOUNDARY) 793 p->p_flag |= P_SINGLE_BOUNDARY; 794 else 795 p->p_flag &= ~P_SINGLE_BOUNDARY; 796 } 797 p->p_flag |= P_STOPPED_SINGLE; 798 mtx_lock_spin(&sched_lock); 799 p->p_singlethread = td; 800 if (mode == SINGLE_EXIT) 801 remaining = p->p_numthreads; 802 else if (mode == SINGLE_BOUNDARY) 803 remaining = p->p_numthreads - p->p_boundary_count; 804 else 805 remaining = p->p_numthreads - p->p_suspcount; 806 while (remaining != 1) { 807 FOREACH_THREAD_IN_PROC(p, td2) { 808 if (td2 == td) 809 continue; 810 td2->td_flags |= TDF_ASTPENDING; 811 if (TD_IS_INHIBITED(td2)) { 812 switch (mode) { 813 case SINGLE_EXIT: 814 if (td->td_flags & TDF_DBSUSPEND) 815 td->td_flags &= ~TDF_DBSUSPEND; 816 if (TD_IS_SUSPENDED(td2)) 817 thread_unsuspend_one(td2); 818 if (TD_ON_SLEEPQ(td2) && 819 (td2->td_flags & TDF_SINTR)) 820 sleepq_abort(td2); 821 break; 822 case SINGLE_BOUNDARY: 823 if (TD_IS_SUSPENDED(td2) && 824 !(td2->td_flags & TDF_BOUNDARY)) 825 thread_unsuspend_one(td2); 826 if (TD_ON_SLEEPQ(td2) && 827 (td2->td_flags & TDF_SINTR)) 828 sleepq_abort(td2); 829 break; 830 default: 831 if (TD_IS_SUSPENDED(td2)) 832 continue; 833 /* 834 * maybe other inhibitted states too? 835 * XXXKSE Is it totally safe to 836 * suspend a non-interruptable thread? 837 */ 838 if (td2->td_inhibitors & 839 (TDI_SLEEPING | TDI_SWAPPED)) 840 thread_suspend_one(td2); 841 break; 842 } 843 } 844 } 845 if (mode == SINGLE_EXIT) 846 remaining = p->p_numthreads; 847 else if (mode == SINGLE_BOUNDARY) 848 remaining = p->p_numthreads - p->p_boundary_count; 849 else 850 remaining = p->p_numthreads - p->p_suspcount; 851 852 /* 853 * Maybe we suspended some threads.. was it enough? 854 */ 855 if (remaining == 1) 856 break; 857 858 /* 859 * Wake us up when everyone else has suspended. 860 * In the mean time we suspend as well. 861 */ 862 thread_suspend_one(td); 863 PROC_UNLOCK(p); 864 mi_switch(SW_VOL, NULL); 865 mtx_unlock_spin(&sched_lock); 866 PROC_LOCK(p); 867 mtx_lock_spin(&sched_lock); 868 if (mode == SINGLE_EXIT) 869 remaining = p->p_numthreads; 870 else if (mode == SINGLE_BOUNDARY) 871 remaining = p->p_numthreads - p->p_boundary_count; 872 else 873 remaining = p->p_numthreads - p->p_suspcount; 874 } 875 if (mode == SINGLE_EXIT) { 876 /* 877 * We have gotten rid of all the other threads and we 878 * are about to either exit or exec. In either case, 879 * we try our utmost to revert to being a non-threaded 880 * process. 881 */ 882 p->p_singlethread = NULL; 883 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT); 884 thread_unthread(td); 885 } 886 mtx_unlock_spin(&sched_lock); 887 return (0); 888 } 889 890 /* 891 * Called in from locations that can safely check to see 892 * whether we have to suspend or at least throttle for a 893 * single-thread event (e.g. fork). 894 * 895 * Such locations include userret(). 896 * If the "return_instead" argument is non zero, the thread must be able to 897 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 898 * 899 * The 'return_instead' argument tells the function if it may do a 900 * thread_exit() or suspend, or whether the caller must abort and back 901 * out instead. 902 * 903 * If the thread that set the single_threading request has set the 904 * P_SINGLE_EXIT bit in the process flags then this call will never return 905 * if 'return_instead' is false, but will exit. 906 * 907 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 908 *---------------+--------------------+--------------------- 909 * 0 | returns 0 | returns 0 or 1 910 * | when ST ends | immediatly 911 *---------------+--------------------+--------------------- 912 * 1 | thread exits | returns 1 913 * | | immediatly 914 * 0 = thread_exit() or suspension ok, 915 * other = return error instead of stopping the thread. 916 * 917 * While a full suspension is under effect, even a single threading 918 * thread would be suspended if it made this call (but it shouldn't). 919 * This call should only be made from places where 920 * thread_exit() would be safe as that may be the outcome unless 921 * return_instead is set. 922 */ 923 int 924 thread_suspend_check(int return_instead) 925 { 926 struct thread *td; 927 struct proc *p; 928 929 td = curthread; 930 p = td->td_proc; 931 mtx_assert(&Giant, MA_NOTOWNED); 932 PROC_LOCK_ASSERT(p, MA_OWNED); 933 while (P_SHOULDSTOP(p) || 934 ((p->p_flag & P_TRACED) && (td->td_flags & TDF_DBSUSPEND))) { 935 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 936 KASSERT(p->p_singlethread != NULL, 937 ("singlethread not set")); 938 /* 939 * The only suspension in action is a 940 * single-threading. Single threader need not stop. 941 * XXX Should be safe to access unlocked 942 * as it can only be set to be true by us. 943 */ 944 if (p->p_singlethread == td) 945 return (0); /* Exempt from stopping. */ 946 } 947 if ((p->p_flag & P_SINGLE_EXIT) && return_instead) 948 return (1); 949 950 /* Should we goto user boundary if we didn't come from there? */ 951 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE && 952 (p->p_flag & P_SINGLE_BOUNDARY) && return_instead) 953 return (1); 954 955 mtx_lock_spin(&sched_lock); 956 thread_stopped(p); 957 /* 958 * If the process is waiting for us to exit, 959 * this thread should just suicide. 960 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 961 */ 962 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) 963 thread_exit(); 964 965 /* 966 * When a thread suspends, it just 967 * moves to the processes's suspend queue 968 * and stays there. 969 */ 970 thread_suspend_one(td); 971 if (return_instead == 0) { 972 p->p_boundary_count++; 973 td->td_flags |= TDF_BOUNDARY; 974 } 975 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 976 if (p->p_numthreads == p->p_suspcount) 977 thread_unsuspend_one(p->p_singlethread); 978 } 979 PROC_UNLOCK(p); 980 mi_switch(SW_INVOL, NULL); 981 if (return_instead == 0) { 982 p->p_boundary_count--; 983 td->td_flags &= ~TDF_BOUNDARY; 984 } 985 mtx_unlock_spin(&sched_lock); 986 PROC_LOCK(p); 987 } 988 return (0); 989 } 990 991 void 992 thread_suspend_one(struct thread *td) 993 { 994 struct proc *p = td->td_proc; 995 996 mtx_assert(&sched_lock, MA_OWNED); 997 PROC_LOCK_ASSERT(p, MA_OWNED); 998 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); 999 p->p_suspcount++; 1000 TD_SET_SUSPENDED(td); 1001 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 1002 /* 1003 * Hack: If we are suspending but are on the sleep queue 1004 * then we are in msleep or the cv equivalent. We 1005 * want to look like we have two Inhibitors. 1006 * May already be set.. doesn't matter. 1007 */ 1008 if (TD_ON_SLEEPQ(td)) 1009 TD_SET_SLEEPING(td); 1010 } 1011 1012 void 1013 thread_unsuspend_one(struct thread *td) 1014 { 1015 struct proc *p = td->td_proc; 1016 1017 mtx_assert(&sched_lock, MA_OWNED); 1018 PROC_LOCK_ASSERT(p, MA_OWNED); 1019 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1020 TD_CLR_SUSPENDED(td); 1021 p->p_suspcount--; 1022 setrunnable(td); 1023 } 1024 1025 /* 1026 * Allow all threads blocked by single threading to continue running. 1027 */ 1028 void 1029 thread_unsuspend(struct proc *p) 1030 { 1031 struct thread *td; 1032 1033 mtx_assert(&sched_lock, MA_OWNED); 1034 PROC_LOCK_ASSERT(p, MA_OWNED); 1035 if (!P_SHOULDSTOP(p)) { 1036 while ((td = TAILQ_FIRST(&p->p_suspended))) { 1037 thread_unsuspend_one(td); 1038 } 1039 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1040 (p->p_numthreads == p->p_suspcount)) { 1041 /* 1042 * Stopping everything also did the job for the single 1043 * threading request. Now we've downgraded to single-threaded, 1044 * let it continue. 1045 */ 1046 thread_unsuspend_one(p->p_singlethread); 1047 } 1048 } 1049 1050 /* 1051 * End the single threading mode.. 1052 */ 1053 void 1054 thread_single_end(void) 1055 { 1056 struct thread *td; 1057 struct proc *p; 1058 1059 td = curthread; 1060 p = td->td_proc; 1061 PROC_LOCK_ASSERT(p, MA_OWNED); 1062 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY); 1063 mtx_lock_spin(&sched_lock); 1064 p->p_singlethread = NULL; 1065 /* 1066 * If there are other threads they mey now run, 1067 * unless of course there is a blanket 'stop order' 1068 * on the process. The single threader must be allowed 1069 * to continue however as this is a bad place to stop. 1070 */ 1071 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 1072 while ((td = TAILQ_FIRST(&p->p_suspended))) { 1073 thread_unsuspend_one(td); 1074 } 1075 } 1076 mtx_unlock_spin(&sched_lock); 1077 } 1078 1079 /* 1080 * Called before going into an interruptible sleep to see if we have been 1081 * interrupted or requested to exit. 1082 */ 1083 int 1084 thread_sleep_check(struct thread *td) 1085 { 1086 struct proc *p; 1087 1088 p = td->td_proc; 1089 mtx_assert(&sched_lock, MA_OWNED); 1090 if (p->p_flag & P_HADTHREADS) { 1091 if (p->p_singlethread != td) { 1092 if (p->p_flag & P_SINGLE_EXIT) 1093 return (EINTR); 1094 if (p->p_flag & P_SINGLE_BOUNDARY) 1095 return (ERESTART); 1096 } 1097 if (td->td_flags & TDF_INTERRUPT) 1098 return (td->td_intrval); 1099 } 1100 return (0); 1101 } 1102