1 /* 2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice(s), this list of conditions and the following disclaimer as 10 * the first lines of this file unmodified other than the possible 11 * addition of one or more copyright notices. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice(s), this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/lock.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/smp.h> 39 #include <sys/sysctl.h> 40 #include <sys/sched.h> 41 #include <sys/sleepqueue.h> 42 #include <sys/turnstile.h> 43 #include <sys/ktr.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_extern.h> 47 #include <vm/uma.h> 48 49 /* 50 * KSEGRP related storage. 51 */ 52 static uma_zone_t ksegrp_zone; 53 static uma_zone_t kse_zone; 54 static uma_zone_t thread_zone; 55 56 /* DEBUG ONLY */ 57 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 58 static int thread_debug = 0; 59 SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW, 60 &thread_debug, 0, "thread debug"); 61 62 int max_threads_per_proc = 1500; 63 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 64 &max_threads_per_proc, 0, "Limit on threads per proc"); 65 66 int max_groups_per_proc = 500; 67 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 68 &max_groups_per_proc, 0, "Limit on thread groups per proc"); 69 70 int max_threads_hits; 71 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD, 72 &max_threads_hits, 0, ""); 73 74 int virtual_cpu; 75 76 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 77 78 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 79 TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); 80 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 81 struct mtx kse_zombie_lock; 82 MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN); 83 84 void kse_purge(struct proc *p, struct thread *td); 85 void kse_purge_group(struct thread *td); 86 87 /* move to proc.h */ 88 extern void kseinit(void); 89 extern void kse_GC(void); 90 91 92 static int 93 sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS) 94 { 95 int error, new_val; 96 int def_val; 97 98 def_val = mp_ncpus; 99 if (virtual_cpu == 0) 100 new_val = def_val; 101 else 102 new_val = virtual_cpu; 103 error = sysctl_handle_int(oidp, &new_val, 0, req); 104 if (error != 0 || req->newptr == NULL) 105 return (error); 106 if (new_val < 0) 107 return (EINVAL); 108 virtual_cpu = new_val; 109 return (0); 110 } 111 112 /* DEBUG ONLY */ 113 SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW, 114 0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I", 115 "debug virtual cpus"); 116 117 /* 118 * Thread ID allocator. The allocator keeps track of assigned IDs by 119 * using a bitmap. The bitmap is created in parts. The parts are linked 120 * together. 121 */ 122 typedef u_long tid_bitmap_word; 123 124 #define TID_IDS_PER_PART 1024 125 #define TID_IDS_PER_IDX (sizeof(tid_bitmap_word) << 3) 126 #define TID_BITMAP_SIZE (TID_IDS_PER_PART / TID_IDS_PER_IDX) 127 #define TID_MIN (PID_MAX + 1) 128 129 struct tid_bitmap_part { 130 STAILQ_ENTRY(tid_bitmap_part) bmp_next; 131 tid_bitmap_word bmp_bitmap[TID_BITMAP_SIZE]; 132 lwpid_t bmp_base; 133 int bmp_free; 134 }; 135 136 static STAILQ_HEAD(, tid_bitmap_part) tid_bitmap = 137 STAILQ_HEAD_INITIALIZER(tid_bitmap); 138 static uma_zone_t tid_zone; 139 140 struct mtx tid_lock; 141 MTX_SYSINIT(tid_lock, &tid_lock, "TID lock", MTX_DEF); 142 143 /* 144 * Prepare a thread for use. 145 */ 146 static int 147 thread_ctor(void *mem, int size, void *arg, int flags) 148 { 149 struct thread *td; 150 151 td = (struct thread *)mem; 152 td->td_state = TDS_INACTIVE; 153 td->td_oncpu = NOCPU; 154 155 /* 156 * Note that td_critnest begins life as 1 because the thread is not 157 * running and is thereby implicitly waiting to be on the receiving 158 * end of a context switch. A context switch must occur inside a 159 * critical section, and in fact, includes hand-off of the sched_lock. 160 * After a context switch to a newly created thread, it will release 161 * sched_lock for the first time, and its td_critnest will hit 0 for 162 * the first time. This happens on the far end of a context switch, 163 * and when it context switches away from itself, it will in fact go 164 * back into a critical section, and hand off the sched lock to the 165 * next thread. 166 */ 167 td->td_critnest = 1; 168 return (0); 169 } 170 171 /* 172 * Reclaim a thread after use. 173 */ 174 static void 175 thread_dtor(void *mem, int size, void *arg) 176 { 177 struct thread *td; 178 179 td = (struct thread *)mem; 180 181 #ifdef INVARIANTS 182 /* Verify that this thread is in a safe state to free. */ 183 switch (td->td_state) { 184 case TDS_INHIBITED: 185 case TDS_RUNNING: 186 case TDS_CAN_RUN: 187 case TDS_RUNQ: 188 /* 189 * We must never unlink a thread that is in one of 190 * these states, because it is currently active. 191 */ 192 panic("bad state for thread unlinking"); 193 /* NOTREACHED */ 194 case TDS_INACTIVE: 195 break; 196 default: 197 panic("bad thread state"); 198 /* NOTREACHED */ 199 } 200 #endif 201 } 202 203 /* 204 * Initialize type-stable parts of a thread (when newly created). 205 */ 206 static int 207 thread_init(void *mem, int size, int flags) 208 { 209 struct thread *td; 210 struct tid_bitmap_part *bmp, *new; 211 int bit, idx; 212 213 td = (struct thread *)mem; 214 215 mtx_lock(&tid_lock); 216 STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 217 if (bmp->bmp_free) 218 break; 219 } 220 /* Create a new bitmap if we run out of free bits. */ 221 if (bmp == NULL) { 222 mtx_unlock(&tid_lock); 223 new = uma_zalloc(tid_zone, M_WAITOK); 224 mtx_lock(&tid_lock); 225 bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next); 226 if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) { 227 /* 1=free, 0=assigned. This way we can use ffsl(). */ 228 memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap)); 229 new->bmp_base = (bmp == NULL) ? TID_MIN : 230 bmp->bmp_base + TID_IDS_PER_PART; 231 new->bmp_free = TID_IDS_PER_PART; 232 STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next); 233 bmp = new; 234 new = NULL; 235 } 236 } else 237 new = NULL; 238 /* We have a bitmap with available IDs. */ 239 idx = 0; 240 while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL) 241 idx++; 242 bit = ffsl(bmp->bmp_bitmap[idx]) - 1; 243 td->td_tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit; 244 bmp->bmp_bitmap[idx] &= ~(1UL << bit); 245 bmp->bmp_free--; 246 mtx_unlock(&tid_lock); 247 if (new != NULL) 248 uma_zfree(tid_zone, new); 249 250 vm_thread_new(td, 0); 251 cpu_thread_setup(td); 252 td->td_sleepqueue = sleepq_alloc(); 253 td->td_turnstile = turnstile_alloc(); 254 td->td_sched = (struct td_sched *)&td[1]; 255 return (0); 256 } 257 258 /* 259 * Tear down type-stable parts of a thread (just before being discarded). 260 */ 261 static void 262 thread_fini(void *mem, int size) 263 { 264 struct thread *td; 265 struct tid_bitmap_part *bmp; 266 lwpid_t tid; 267 int bit, idx; 268 269 td = (struct thread *)mem; 270 turnstile_free(td->td_turnstile); 271 sleepq_free(td->td_sleepqueue); 272 vm_thread_dispose(td); 273 274 STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 275 if (td->td_tid >= bmp->bmp_base && 276 td->td_tid < bmp->bmp_base + TID_IDS_PER_PART) 277 break; 278 } 279 KASSERT(bmp != NULL, ("No TID bitmap?")); 280 mtx_lock(&tid_lock); 281 tid = td->td_tid - bmp->bmp_base; 282 idx = tid / TID_IDS_PER_IDX; 283 bit = 1UL << (tid % TID_IDS_PER_IDX); 284 bmp->bmp_bitmap[idx] |= bit; 285 bmp->bmp_free++; 286 mtx_unlock(&tid_lock); 287 } 288 289 /* 290 * Initialize type-stable parts of a kse (when newly created). 291 */ 292 static int 293 kse_init(void *mem, int size, int flags) 294 { 295 struct kse *ke; 296 297 ke = (struct kse *)mem; 298 ke->ke_sched = (struct ke_sched *)&ke[1]; 299 return (0); 300 } 301 302 /* 303 * Initialize type-stable parts of a ksegrp (when newly created). 304 */ 305 static int 306 ksegrp_init(void *mem, int size, int flags) 307 { 308 struct ksegrp *kg; 309 310 kg = (struct ksegrp *)mem; 311 kg->kg_sched = (struct kg_sched *)&kg[1]; 312 return (0); 313 } 314 315 /* 316 * KSE is linked into kse group. 317 */ 318 void 319 kse_link(struct kse *ke, struct ksegrp *kg) 320 { 321 struct proc *p = kg->kg_proc; 322 323 TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); 324 kg->kg_kses++; 325 ke->ke_state = KES_UNQUEUED; 326 ke->ke_proc = p; 327 ke->ke_ksegrp = kg; 328 ke->ke_thread = NULL; 329 ke->ke_oncpu = NOCPU; 330 ke->ke_flags = 0; 331 } 332 333 void 334 kse_unlink(struct kse *ke) 335 { 336 struct ksegrp *kg; 337 338 mtx_assert(&sched_lock, MA_OWNED); 339 kg = ke->ke_ksegrp; 340 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 341 if (ke->ke_state == KES_IDLE) { 342 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 343 kg->kg_idle_kses--; 344 } 345 --kg->kg_kses; 346 /* 347 * Aggregate stats from the KSE 348 */ 349 kse_stash(ke); 350 } 351 352 void 353 ksegrp_link(struct ksegrp *kg, struct proc *p) 354 { 355 356 TAILQ_INIT(&kg->kg_threads); 357 TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 358 TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */ 359 TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ 360 TAILQ_INIT(&kg->kg_iq); /* all idle kses in ksegrp */ 361 TAILQ_INIT(&kg->kg_upcalls); /* all upcall structure in ksegrp */ 362 kg->kg_proc = p; 363 /* 364 * the following counters are in the -zero- section 365 * and may not need clearing 366 */ 367 kg->kg_numthreads = 0; 368 kg->kg_runnable = 0; 369 kg->kg_kses = 0; 370 kg->kg_runq_kses = 0; /* XXXKSE change name */ 371 kg->kg_idle_kses = 0; 372 kg->kg_numupcalls = 0; 373 /* link it in now that it's consistent */ 374 p->p_numksegrps++; 375 TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 376 } 377 378 void 379 ksegrp_unlink(struct ksegrp *kg) 380 { 381 struct proc *p; 382 383 mtx_assert(&sched_lock, MA_OWNED); 384 KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads")); 385 KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses")); 386 KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls")); 387 388 p = kg->kg_proc; 389 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 390 p->p_numksegrps--; 391 /* 392 * Aggregate stats from the KSE 393 */ 394 ksegrp_stash(kg); 395 } 396 397 /* 398 * For a newly created process, 399 * link up all the structures and its initial threads etc. 400 */ 401 void 402 proc_linkup(struct proc *p, struct ksegrp *kg, 403 struct kse *ke, struct thread *td) 404 { 405 406 TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 407 TAILQ_INIT(&p->p_threads); /* all threads in proc */ 408 TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 409 p->p_numksegrps = 0; 410 p->p_numthreads = 0; 411 412 ksegrp_link(kg, p); 413 kse_link(ke, kg); 414 thread_link(td, kg); 415 } 416 417 /* 418 * Initialize global thread allocation resources. 419 */ 420 void 421 threadinit(void) 422 { 423 424 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 425 thread_ctor, thread_dtor, thread_init, thread_fini, 426 UMA_ALIGN_CACHE, 0); 427 tid_zone = uma_zcreate("TID", sizeof(struct tid_bitmap_part), 428 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); 429 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 430 NULL, NULL, ksegrp_init, NULL, 431 UMA_ALIGN_CACHE, 0); 432 kse_zone = uma_zcreate("KSE", sched_sizeof_kse(), 433 NULL, NULL, kse_init, NULL, 434 UMA_ALIGN_CACHE, 0); 435 kseinit(); 436 } 437 438 /* 439 * Stash an embarasingly extra thread into the zombie thread queue. 440 */ 441 void 442 thread_stash(struct thread *td) 443 { 444 mtx_lock_spin(&kse_zombie_lock); 445 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 446 mtx_unlock_spin(&kse_zombie_lock); 447 } 448 449 /* 450 * Stash an embarasingly extra kse into the zombie kse queue. 451 */ 452 void 453 kse_stash(struct kse *ke) 454 { 455 mtx_lock_spin(&kse_zombie_lock); 456 TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); 457 mtx_unlock_spin(&kse_zombie_lock); 458 } 459 460 /* 461 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 462 */ 463 void 464 ksegrp_stash(struct ksegrp *kg) 465 { 466 mtx_lock_spin(&kse_zombie_lock); 467 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 468 mtx_unlock_spin(&kse_zombie_lock); 469 } 470 471 /* 472 * Reap zombie kse resource. 473 */ 474 void 475 thread_reap(void) 476 { 477 struct thread *td_first, *td_next; 478 struct kse *ke_first, *ke_next; 479 struct ksegrp *kg_first, * kg_next; 480 481 /* 482 * Don't even bother to lock if none at this instant, 483 * we really don't care about the next instant.. 484 */ 485 if ((!TAILQ_EMPTY(&zombie_threads)) 486 || (!TAILQ_EMPTY(&zombie_kses)) 487 || (!TAILQ_EMPTY(&zombie_ksegrps))) { 488 mtx_lock_spin(&kse_zombie_lock); 489 td_first = TAILQ_FIRST(&zombie_threads); 490 ke_first = TAILQ_FIRST(&zombie_kses); 491 kg_first = TAILQ_FIRST(&zombie_ksegrps); 492 if (td_first) 493 TAILQ_INIT(&zombie_threads); 494 if (ke_first) 495 TAILQ_INIT(&zombie_kses); 496 if (kg_first) 497 TAILQ_INIT(&zombie_ksegrps); 498 mtx_unlock_spin(&kse_zombie_lock); 499 while (td_first) { 500 td_next = TAILQ_NEXT(td_first, td_runq); 501 if (td_first->td_ucred) 502 crfree(td_first->td_ucred); 503 thread_free(td_first); 504 td_first = td_next; 505 } 506 while (ke_first) { 507 ke_next = TAILQ_NEXT(ke_first, ke_procq); 508 kse_free(ke_first); 509 ke_first = ke_next; 510 } 511 while (kg_first) { 512 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 513 ksegrp_free(kg_first); 514 kg_first = kg_next; 515 } 516 } 517 kse_GC(); 518 } 519 520 /* 521 * Allocate a ksegrp. 522 */ 523 struct ksegrp * 524 ksegrp_alloc(void) 525 { 526 return (uma_zalloc(ksegrp_zone, M_WAITOK)); 527 } 528 529 /* 530 * Allocate a kse. 531 */ 532 struct kse * 533 kse_alloc(void) 534 { 535 return (uma_zalloc(kse_zone, M_WAITOK)); 536 } 537 538 /* 539 * Allocate a thread. 540 */ 541 struct thread * 542 thread_alloc(void) 543 { 544 thread_reap(); /* check if any zombies to get */ 545 return (uma_zalloc(thread_zone, M_WAITOK)); 546 } 547 548 /* 549 * Deallocate a ksegrp. 550 */ 551 void 552 ksegrp_free(struct ksegrp *td) 553 { 554 uma_zfree(ksegrp_zone, td); 555 } 556 557 /* 558 * Deallocate a kse. 559 */ 560 void 561 kse_free(struct kse *td) 562 { 563 uma_zfree(kse_zone, td); 564 } 565 566 /* 567 * Deallocate a thread. 568 */ 569 void 570 thread_free(struct thread *td) 571 { 572 573 cpu_thread_clean(td); 574 uma_zfree(thread_zone, td); 575 } 576 577 /* 578 * Discard the current thread and exit from its context. 579 * Always called with scheduler locked. 580 * 581 * Because we can't free a thread while we're operating under its context, 582 * push the current thread into our CPU's deadthread holder. This means 583 * we needn't worry about someone else grabbing our context before we 584 * do a cpu_throw(). This may not be needed now as we are under schedlock. 585 * Maybe we can just do a thread_stash() as thr_exit1 does. 586 */ 587 /* XXX 588 * libthr expects its thread exit to return for the last 589 * thread, meaning that the program is back to non-threaded 590 * mode I guess. Because we do this (cpu_throw) unconditionally 591 * here, they have their own version of it. (thr_exit1()) 592 * that doesn't do it all if this was the last thread. 593 * It is also called from thread_suspend_check(). 594 * Of course in the end, they end up coming here through exit1 595 * anyhow.. After fixing 'thr' to play by the rules we should be able 596 * to merge these two functions together. 597 */ 598 void 599 thread_exit(void) 600 { 601 struct thread *td; 602 struct kse *ke; 603 struct proc *p; 604 struct ksegrp *kg; 605 606 td = curthread; 607 kg = td->td_ksegrp; 608 p = td->td_proc; 609 ke = td->td_kse; 610 611 mtx_assert(&sched_lock, MA_OWNED); 612 KASSERT(p != NULL, ("thread exiting without a process")); 613 KASSERT(ke != NULL, ("thread exiting without a kse")); 614 KASSERT(kg != NULL, ("thread exiting without a kse group")); 615 PROC_LOCK_ASSERT(p, MA_OWNED); 616 CTR1(KTR_PROC, "thread_exit: thread %p", td); 617 mtx_assert(&Giant, MA_NOTOWNED); 618 619 if (td->td_standin != NULL) { 620 thread_stash(td->td_standin); 621 td->td_standin = NULL; 622 } 623 624 cpu_thread_exit(td); /* XXXSMP */ 625 626 /* 627 * The last thread is left attached to the process 628 * So that the whole bundle gets recycled. Skip 629 * all this stuff. 630 */ 631 if (p->p_numthreads > 1) { 632 thread_unlink(td); 633 if (p->p_maxthrwaits) 634 wakeup(&p->p_numthreads); 635 /* 636 * The test below is NOT true if we are the 637 * sole exiting thread. P_STOPPED_SNGL is unset 638 * in exit1() after it is the only survivor. 639 */ 640 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 641 if (p->p_numthreads == p->p_suspcount) { 642 thread_unsuspend_one(p->p_singlethread); 643 } 644 } 645 646 /* 647 * Because each upcall structure has an owner thread, 648 * owner thread exits only when process is in exiting 649 * state, so upcall to userland is no longer needed, 650 * deleting upcall structure is safe here. 651 * So when all threads in a group is exited, all upcalls 652 * in the group should be automatically freed. 653 */ 654 if (td->td_upcall) 655 upcall_remove(td); 656 657 sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); 658 sched_exit_kse(FIRST_KSE_IN_PROC(p), td); 659 ke->ke_state = KES_UNQUEUED; 660 ke->ke_thread = NULL; 661 /* 662 * Decide what to do with the KSE attached to this thread. 663 */ 664 if (ke->ke_flags & KEF_EXIT) { 665 kse_unlink(ke); 666 if (kg->kg_kses == 0) { 667 sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); 668 ksegrp_unlink(kg); 669 } 670 } 671 else 672 kse_reassign(ke); 673 PROC_UNLOCK(p); 674 td->td_kse = NULL; 675 #if 0 676 td->td_proc = NULL; 677 #endif 678 td->td_ksegrp = NULL; 679 td->td_last_kse = NULL; 680 PCPU_SET(deadthread, td); 681 } else { 682 PROC_UNLOCK(p); 683 } 684 td->td_state = TDS_INACTIVE; 685 /* XXX Shouldn't cpu_throw() here. */ 686 mtx_assert(&sched_lock, MA_OWNED); 687 cpu_throw(td, choosethread()); 688 panic("I'm a teapot!"); 689 /* NOTREACHED */ 690 } 691 692 /* 693 * Do any thread specific cleanups that may be needed in wait() 694 * called with Giant, proc and schedlock not held. 695 */ 696 void 697 thread_wait(struct proc *p) 698 { 699 struct thread *td; 700 701 mtx_assert(&Giant, MA_NOTOWNED); 702 KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()")); 703 KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()")); 704 FOREACH_THREAD_IN_PROC(p, td) { 705 if (td->td_standin != NULL) { 706 thread_free(td->td_standin); 707 td->td_standin = NULL; 708 } 709 cpu_thread_clean(td); 710 } 711 thread_reap(); /* check for zombie threads etc. */ 712 } 713 714 /* 715 * Link a thread to a process. 716 * set up anything that needs to be initialized for it to 717 * be used by the process. 718 * 719 * Note that we do not link to the proc's ucred here. 720 * The thread is linked as if running but no KSE assigned. 721 */ 722 void 723 thread_link(struct thread *td, struct ksegrp *kg) 724 { 725 struct proc *p; 726 727 p = kg->kg_proc; 728 td->td_state = TDS_INACTIVE; 729 td->td_proc = p; 730 td->td_ksegrp = kg; 731 td->td_last_kse = NULL; 732 td->td_flags = 0; 733 td->td_kflags = 0; 734 td->td_kse = NULL; 735 736 LIST_INIT(&td->td_contested); 737 callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); 738 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 739 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 740 p->p_numthreads++; 741 kg->kg_numthreads++; 742 } 743 744 void 745 thread_unlink(struct thread *td) 746 { 747 struct proc *p = td->td_proc; 748 struct ksegrp *kg = td->td_ksegrp; 749 750 mtx_assert(&sched_lock, MA_OWNED); 751 TAILQ_REMOVE(&p->p_threads, td, td_plist); 752 p->p_numthreads--; 753 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 754 kg->kg_numthreads--; 755 /* could clear a few other things here */ 756 } 757 758 /* 759 * Purge a ksegrp resource. When a ksegrp is preparing to 760 * exit, it calls this function. 761 */ 762 void 763 kse_purge_group(struct thread *td) 764 { 765 struct ksegrp *kg; 766 struct kse *ke; 767 768 kg = td->td_ksegrp; 769 KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__)); 770 while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 771 KASSERT(ke->ke_state == KES_IDLE, 772 ("%s: wrong idle KSE state", __func__)); 773 kse_unlink(ke); 774 } 775 KASSERT((kg->kg_kses == 1), 776 ("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses)); 777 KASSERT((kg->kg_numupcalls == 0), 778 ("%s: ksegrp still has %d upcall datas", 779 __func__, kg->kg_numupcalls)); 780 } 781 782 /* 783 * Purge a process's KSE resource. When a process is preparing to 784 * exit, it calls kse_purge to release any extra KSE resources in 785 * the process. 786 */ 787 void 788 kse_purge(struct proc *p, struct thread *td) 789 { 790 struct ksegrp *kg; 791 struct kse *ke; 792 793 KASSERT(p->p_numthreads == 1, ("bad thread number")); 794 while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) { 795 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 796 p->p_numksegrps--; 797 /* 798 * There is no ownership for KSE, after all threads 799 * in the group exited, it is possible that some KSEs 800 * were left in idle queue, gc them now. 801 */ 802 while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 803 KASSERT(ke->ke_state == KES_IDLE, 804 ("%s: wrong idle KSE state", __func__)); 805 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 806 kg->kg_idle_kses--; 807 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 808 kg->kg_kses--; 809 kse_stash(ke); 810 } 811 KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) || 812 ((kg->kg_kses == 1) && (kg == td->td_ksegrp)), 813 ("ksegrp has wrong kg_kses: %d", kg->kg_kses)); 814 KASSERT((kg->kg_numupcalls == 0), 815 ("%s: ksegrp still has %d upcall datas", 816 __func__, kg->kg_numupcalls)); 817 818 if (kg != td->td_ksegrp) 819 ksegrp_stash(kg); 820 } 821 TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp); 822 p->p_numksegrps++; 823 } 824 825 /* 826 * Enforce single-threading. 827 * 828 * Returns 1 if the caller must abort (another thread is waiting to 829 * exit the process or similar). Process is locked! 830 * Returns 0 when you are successfully the only thread running. 831 * A process has successfully single threaded in the suspend mode when 832 * There are no threads in user mode. Threads in the kernel must be 833 * allowed to continue until they get to the user boundary. They may even 834 * copy out their return values and data before suspending. They may however be 835 * accellerated in reaching the user boundary as we will wake up 836 * any sleeping threads that are interruptable. (PCATCH). 837 */ 838 int 839 thread_single(int force_exit) 840 { 841 struct thread *td; 842 struct thread *td2; 843 struct proc *p; 844 int remaining; 845 846 td = curthread; 847 p = td->td_proc; 848 mtx_assert(&Giant, MA_NOTOWNED); 849 PROC_LOCK_ASSERT(p, MA_OWNED); 850 KASSERT((td != NULL), ("curthread is NULL")); 851 852 if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1) 853 return (0); 854 855 /* Is someone already single threading? */ 856 if (p->p_singlethread) 857 return (1); 858 859 if (force_exit == SINGLE_EXIT) { 860 p->p_flag |= P_SINGLE_EXIT; 861 } else 862 p->p_flag &= ~P_SINGLE_EXIT; 863 p->p_flag |= P_STOPPED_SINGLE; 864 mtx_lock_spin(&sched_lock); 865 p->p_singlethread = td; 866 if (force_exit == SINGLE_EXIT) 867 remaining = p->p_numthreads; 868 else 869 remaining = p->p_numthreads - p->p_suspcount; 870 while (remaining != 1) { 871 FOREACH_THREAD_IN_PROC(p, td2) { 872 if (td2 == td) 873 continue; 874 td2->td_flags |= TDF_ASTPENDING; 875 if (TD_IS_INHIBITED(td2)) { 876 if (force_exit == SINGLE_EXIT) { 877 if (td->td_flags & TDF_DBSUSPEND) 878 td->td_flags &= ~TDF_DBSUSPEND; 879 if (TD_IS_SUSPENDED(td2)) { 880 thread_unsuspend_one(td2); 881 } 882 if (TD_ON_SLEEPQ(td2) && 883 (td2->td_flags & TDF_SINTR)) { 884 sleepq_abort(td2); 885 } 886 } else { 887 if (TD_IS_SUSPENDED(td2)) 888 continue; 889 /* 890 * maybe other inhibitted states too? 891 * XXXKSE Is it totally safe to 892 * suspend a non-interruptable thread? 893 */ 894 if (td2->td_inhibitors & 895 (TDI_SLEEPING | TDI_SWAPPED)) 896 thread_suspend_one(td2); 897 } 898 } 899 } 900 if (force_exit == SINGLE_EXIT) 901 remaining = p->p_numthreads; 902 else 903 remaining = p->p_numthreads - p->p_suspcount; 904 905 /* 906 * Maybe we suspended some threads.. was it enough? 907 */ 908 if (remaining == 1) 909 break; 910 911 /* 912 * Wake us up when everyone else has suspended. 913 * In the mean time we suspend as well. 914 */ 915 thread_suspend_one(td); 916 PROC_UNLOCK(p); 917 mi_switch(SW_VOL, NULL); 918 mtx_unlock_spin(&sched_lock); 919 PROC_LOCK(p); 920 mtx_lock_spin(&sched_lock); 921 if (force_exit == SINGLE_EXIT) 922 remaining = p->p_numthreads; 923 else 924 remaining = p->p_numthreads - p->p_suspcount; 925 } 926 if (force_exit == SINGLE_EXIT) { 927 if (td->td_upcall) 928 upcall_remove(td); 929 kse_purge(p, td); 930 } 931 mtx_unlock_spin(&sched_lock); 932 return (0); 933 } 934 935 /* 936 * Called in from locations that can safely check to see 937 * whether we have to suspend or at least throttle for a 938 * single-thread event (e.g. fork). 939 * 940 * Such locations include userret(). 941 * If the "return_instead" argument is non zero, the thread must be able to 942 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 943 * 944 * The 'return_instead' argument tells the function if it may do a 945 * thread_exit() or suspend, or whether the caller must abort and back 946 * out instead. 947 * 948 * If the thread that set the single_threading request has set the 949 * P_SINGLE_EXIT bit in the process flags then this call will never return 950 * if 'return_instead' is false, but will exit. 951 * 952 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 953 *---------------+--------------------+--------------------- 954 * 0 | returns 0 | returns 0 or 1 955 * | when ST ends | immediatly 956 *---------------+--------------------+--------------------- 957 * 1 | thread exits | returns 1 958 * | | immediatly 959 * 0 = thread_exit() or suspension ok, 960 * other = return error instead of stopping the thread. 961 * 962 * While a full suspension is under effect, even a single threading 963 * thread would be suspended if it made this call (but it shouldn't). 964 * This call should only be made from places where 965 * thread_exit() would be safe as that may be the outcome unless 966 * return_instead is set. 967 */ 968 int 969 thread_suspend_check(int return_instead) 970 { 971 struct thread *td; 972 struct proc *p; 973 974 td = curthread; 975 p = td->td_proc; 976 mtx_assert(&Giant, MA_NOTOWNED); 977 PROC_LOCK_ASSERT(p, MA_OWNED); 978 while (P_SHOULDSTOP(p) || 979 ((p->p_flag & P_TRACED) && (td->td_flags & TDF_DBSUSPEND))) { 980 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 981 KASSERT(p->p_singlethread != NULL, 982 ("singlethread not set")); 983 /* 984 * The only suspension in action is a 985 * single-threading. Single threader need not stop. 986 * XXX Should be safe to access unlocked 987 * as it can only be set to be true by us. 988 */ 989 if (p->p_singlethread == td) 990 return (0); /* Exempt from stopping. */ 991 } 992 if (return_instead) 993 return (1); 994 995 mtx_lock_spin(&sched_lock); 996 thread_stopped(p); 997 /* 998 * If the process is waiting for us to exit, 999 * this thread should just suicide. 1000 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 1001 */ 1002 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1003 if (p->p_flag & P_SA) 1004 thread_exit(); 1005 else 1006 thr_exit1(); 1007 } 1008 1009 /* 1010 * When a thread suspends, it just 1011 * moves to the processes's suspend queue 1012 * and stays there. 1013 */ 1014 thread_suspend_one(td); 1015 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1016 if (p->p_numthreads == p->p_suspcount) { 1017 thread_unsuspend_one(p->p_singlethread); 1018 } 1019 } 1020 PROC_UNLOCK(p); 1021 mi_switch(SW_INVOL, NULL); 1022 mtx_unlock_spin(&sched_lock); 1023 PROC_LOCK(p); 1024 } 1025 return (0); 1026 } 1027 1028 void 1029 thread_suspend_one(struct thread *td) 1030 { 1031 struct proc *p = td->td_proc; 1032 1033 mtx_assert(&sched_lock, MA_OWNED); 1034 PROC_LOCK_ASSERT(p, MA_OWNED); 1035 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); 1036 p->p_suspcount++; 1037 TD_SET_SUSPENDED(td); 1038 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 1039 /* 1040 * Hack: If we are suspending but are on the sleep queue 1041 * then we are in msleep or the cv equivalent. We 1042 * want to look like we have two Inhibitors. 1043 * May already be set.. doesn't matter. 1044 */ 1045 if (TD_ON_SLEEPQ(td)) 1046 TD_SET_SLEEPING(td); 1047 } 1048 1049 void 1050 thread_unsuspend_one(struct thread *td) 1051 { 1052 struct proc *p = td->td_proc; 1053 1054 mtx_assert(&sched_lock, MA_OWNED); 1055 PROC_LOCK_ASSERT(p, MA_OWNED); 1056 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1057 TD_CLR_SUSPENDED(td); 1058 p->p_suspcount--; 1059 setrunnable(td); 1060 } 1061 1062 /* 1063 * Allow all threads blocked by single threading to continue running. 1064 */ 1065 void 1066 thread_unsuspend(struct proc *p) 1067 { 1068 struct thread *td; 1069 1070 mtx_assert(&sched_lock, MA_OWNED); 1071 PROC_LOCK_ASSERT(p, MA_OWNED); 1072 if (!P_SHOULDSTOP(p)) { 1073 while ((td = TAILQ_FIRST(&p->p_suspended))) { 1074 thread_unsuspend_one(td); 1075 } 1076 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1077 (p->p_numthreads == p->p_suspcount)) { 1078 /* 1079 * Stopping everything also did the job for the single 1080 * threading request. Now we've downgraded to single-threaded, 1081 * let it continue. 1082 */ 1083 thread_unsuspend_one(p->p_singlethread); 1084 } 1085 } 1086 1087 void 1088 thread_single_end(void) 1089 { 1090 struct thread *td; 1091 struct proc *p; 1092 1093 td = curthread; 1094 p = td->td_proc; 1095 PROC_LOCK_ASSERT(p, MA_OWNED); 1096 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT); 1097 mtx_lock_spin(&sched_lock); 1098 p->p_singlethread = NULL; 1099 /* 1100 * If there are other threads they mey now run, 1101 * unless of course there is a blanket 'stop order' 1102 * on the process. The single threader must be allowed 1103 * to continue however as this is a bad place to stop. 1104 */ 1105 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 1106 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1107 thread_unsuspend_one(td); 1108 } 1109 } 1110 mtx_unlock_spin(&sched_lock); 1111 } 1112 1113