1 /* 2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice(s), this list of conditions and the following disclaimer as 10 * the first lines of this file unmodified other than the possible 11 * addition of one or more copyright notices. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice(s), this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/lock.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/smp.h> 39 #include <sys/sysctl.h> 40 #include <sys/sched.h> 41 #include <sys/sleepqueue.h> 42 #include <sys/turnstile.h> 43 #include <sys/ktr.h> 44 45 #include <vm/vm.h> 46 #include <vm/vm_extern.h> 47 #include <vm/uma.h> 48 49 /* 50 * KSEGRP related storage. 51 */ 52 static uma_zone_t ksegrp_zone; 53 static uma_zone_t kse_zone; 54 static uma_zone_t thread_zone; 55 56 /* DEBUG ONLY */ 57 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 58 static int thread_debug = 0; 59 SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW, 60 &thread_debug, 0, "thread debug"); 61 62 int max_threads_per_proc = 1500; 63 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 64 &max_threads_per_proc, 0, "Limit on threads per proc"); 65 66 int max_groups_per_proc = 500; 67 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 68 &max_groups_per_proc, 0, "Limit on thread groups per proc"); 69 70 int max_threads_hits; 71 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD, 72 &max_threads_hits, 0, ""); 73 74 int virtual_cpu; 75 76 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 77 78 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 79 TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); 80 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 81 struct mtx kse_zombie_lock; 82 MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN); 83 84 void kse_purge(struct proc *p, struct thread *td); 85 void kse_purge_group(struct thread *td); 86 87 /* move to proc.h */ 88 extern void kseinit(void); 89 extern void kse_GC(void); 90 91 92 static int 93 sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS) 94 { 95 int error, new_val; 96 int def_val; 97 98 def_val = mp_ncpus; 99 if (virtual_cpu == 0) 100 new_val = def_val; 101 else 102 new_val = virtual_cpu; 103 error = sysctl_handle_int(oidp, &new_val, 0, req); 104 if (error != 0 || req->newptr == NULL) 105 return (error); 106 if (new_val < 0) 107 return (EINVAL); 108 virtual_cpu = new_val; 109 return (0); 110 } 111 112 /* DEBUG ONLY */ 113 SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW, 114 0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I", 115 "debug virtual cpus"); 116 117 /* 118 * Thread ID allocator. The allocator keeps track of assigned IDs by 119 * using a bitmap. The bitmap is created in parts. The parts are linked 120 * together. 121 */ 122 typedef u_long tid_bitmap_word; 123 124 #define TID_IDS_PER_PART 1024 125 #define TID_IDS_PER_IDX (sizeof(tid_bitmap_word) << 3) 126 #define TID_BITMAP_SIZE (TID_IDS_PER_PART / TID_IDS_PER_IDX) 127 #define TID_MIN (PID_MAX + 1) 128 129 struct tid_bitmap_part { 130 STAILQ_ENTRY(tid_bitmap_part) bmp_next; 131 tid_bitmap_word bmp_bitmap[TID_BITMAP_SIZE]; 132 lwpid_t bmp_base; 133 int bmp_free; 134 }; 135 136 static STAILQ_HEAD(, tid_bitmap_part) tid_bitmap = 137 STAILQ_HEAD_INITIALIZER(tid_bitmap); 138 static uma_zone_t tid_zone; 139 140 struct mtx tid_lock; 141 MTX_SYSINIT(tid_lock, &tid_lock, "TID lock", MTX_DEF); 142 143 /* 144 * Prepare a thread for use. 145 */ 146 static void 147 thread_ctor(void *mem, int size, void *arg) 148 { 149 struct thread *td; 150 151 td = (struct thread *)mem; 152 td->td_state = TDS_INACTIVE; 153 td->td_oncpu = NOCPU; 154 155 /* 156 * Note that td_critnest begins life as 1 because the thread is not 157 * running and is thereby implicitly waiting to be on the receiving 158 * end of a context switch. A context switch must occur inside a 159 * critical section, and in fact, includes hand-off of the sched_lock. 160 * After a context switch to a newly created thread, it will release 161 * sched_lock for the first time, and its td_critnest will hit 0 for 162 * the first time. This happens on the far end of a context switch, 163 * and when it context switches away from itself, it will in fact go 164 * back into a critical section, and hand off the sched lock to the 165 * next thread. 166 */ 167 td->td_critnest = 1; 168 } 169 170 /* 171 * Reclaim a thread after use. 172 */ 173 static void 174 thread_dtor(void *mem, int size, void *arg) 175 { 176 struct thread *td; 177 178 td = (struct thread *)mem; 179 180 #ifdef INVARIANTS 181 /* Verify that this thread is in a safe state to free. */ 182 switch (td->td_state) { 183 case TDS_INHIBITED: 184 case TDS_RUNNING: 185 case TDS_CAN_RUN: 186 case TDS_RUNQ: 187 /* 188 * We must never unlink a thread that is in one of 189 * these states, because it is currently active. 190 */ 191 panic("bad state for thread unlinking"); 192 /* NOTREACHED */ 193 case TDS_INACTIVE: 194 break; 195 default: 196 panic("bad thread state"); 197 /* NOTREACHED */ 198 } 199 #endif 200 } 201 202 /* 203 * Initialize type-stable parts of a thread (when newly created). 204 */ 205 static void 206 thread_init(void *mem, int size) 207 { 208 struct thread *td; 209 struct tid_bitmap_part *bmp, *new; 210 int bit, idx; 211 212 td = (struct thread *)mem; 213 214 mtx_lock(&tid_lock); 215 STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 216 if (bmp->bmp_free) 217 break; 218 } 219 /* Create a new bitmap if we run out of free bits. */ 220 if (bmp == NULL) { 221 mtx_unlock(&tid_lock); 222 new = uma_zalloc(tid_zone, M_WAITOK); 223 mtx_lock(&tid_lock); 224 bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next); 225 if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) { 226 /* 1=free, 0=assigned. This way we can use ffsl(). */ 227 memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap)); 228 new->bmp_base = (bmp == NULL) ? TID_MIN : 229 bmp->bmp_base + TID_IDS_PER_PART; 230 new->bmp_free = TID_IDS_PER_PART; 231 STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next); 232 bmp = new; 233 new = NULL; 234 } 235 } else 236 new = NULL; 237 /* We have a bitmap with available IDs. */ 238 idx = 0; 239 while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL) 240 idx++; 241 bit = ffsl(bmp->bmp_bitmap[idx]) - 1; 242 td->td_tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit; 243 bmp->bmp_bitmap[idx] &= ~(1UL << bit); 244 bmp->bmp_free--; 245 mtx_unlock(&tid_lock); 246 if (new != NULL) 247 uma_zfree(tid_zone, new); 248 249 vm_thread_new(td, 0); 250 cpu_thread_setup(td); 251 td->td_sleepqueue = sleepq_alloc(); 252 td->td_turnstile = turnstile_alloc(); 253 td->td_sched = (struct td_sched *)&td[1]; 254 } 255 256 /* 257 * Tear down type-stable parts of a thread (just before being discarded). 258 */ 259 static void 260 thread_fini(void *mem, int size) 261 { 262 struct thread *td; 263 struct tid_bitmap_part *bmp; 264 lwpid_t tid; 265 int bit, idx; 266 267 td = (struct thread *)mem; 268 turnstile_free(td->td_turnstile); 269 sleepq_free(td->td_sleepqueue); 270 vm_thread_dispose(td); 271 272 STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { 273 if (td->td_tid >= bmp->bmp_base && 274 td->td_tid < bmp->bmp_base + TID_IDS_PER_PART) 275 break; 276 } 277 KASSERT(bmp != NULL, ("No TID bitmap?")); 278 mtx_lock(&tid_lock); 279 tid = td->td_tid - bmp->bmp_base; 280 idx = tid / TID_IDS_PER_IDX; 281 bit = 1UL << (tid % TID_IDS_PER_IDX); 282 bmp->bmp_bitmap[idx] |= bit; 283 bmp->bmp_free++; 284 mtx_unlock(&tid_lock); 285 } 286 287 /* 288 * Initialize type-stable parts of a kse (when newly created). 289 */ 290 static void 291 kse_init(void *mem, int size) 292 { 293 struct kse *ke; 294 295 ke = (struct kse *)mem; 296 ke->ke_sched = (struct ke_sched *)&ke[1]; 297 } 298 299 /* 300 * Initialize type-stable parts of a ksegrp (when newly created). 301 */ 302 static void 303 ksegrp_init(void *mem, int size) 304 { 305 struct ksegrp *kg; 306 307 kg = (struct ksegrp *)mem; 308 kg->kg_sched = (struct kg_sched *)&kg[1]; 309 } 310 311 /* 312 * KSE is linked into kse group. 313 */ 314 void 315 kse_link(struct kse *ke, struct ksegrp *kg) 316 { 317 struct proc *p = kg->kg_proc; 318 319 TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); 320 kg->kg_kses++; 321 ke->ke_state = KES_UNQUEUED; 322 ke->ke_proc = p; 323 ke->ke_ksegrp = kg; 324 ke->ke_thread = NULL; 325 ke->ke_oncpu = NOCPU; 326 ke->ke_flags = 0; 327 } 328 329 void 330 kse_unlink(struct kse *ke) 331 { 332 struct ksegrp *kg; 333 334 mtx_assert(&sched_lock, MA_OWNED); 335 kg = ke->ke_ksegrp; 336 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 337 if (ke->ke_state == KES_IDLE) { 338 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 339 kg->kg_idle_kses--; 340 } 341 --kg->kg_kses; 342 /* 343 * Aggregate stats from the KSE 344 */ 345 kse_stash(ke); 346 } 347 348 void 349 ksegrp_link(struct ksegrp *kg, struct proc *p) 350 { 351 352 TAILQ_INIT(&kg->kg_threads); 353 TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 354 TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */ 355 TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ 356 TAILQ_INIT(&kg->kg_iq); /* all idle kses in ksegrp */ 357 TAILQ_INIT(&kg->kg_upcalls); /* all upcall structure in ksegrp */ 358 kg->kg_proc = p; 359 /* 360 * the following counters are in the -zero- section 361 * and may not need clearing 362 */ 363 kg->kg_numthreads = 0; 364 kg->kg_runnable = 0; 365 kg->kg_kses = 0; 366 kg->kg_runq_kses = 0; /* XXXKSE change name */ 367 kg->kg_idle_kses = 0; 368 kg->kg_numupcalls = 0; 369 /* link it in now that it's consistent */ 370 p->p_numksegrps++; 371 TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 372 } 373 374 void 375 ksegrp_unlink(struct ksegrp *kg) 376 { 377 struct proc *p; 378 379 mtx_assert(&sched_lock, MA_OWNED); 380 KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads")); 381 KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses")); 382 KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls")); 383 384 p = kg->kg_proc; 385 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 386 p->p_numksegrps--; 387 /* 388 * Aggregate stats from the KSE 389 */ 390 ksegrp_stash(kg); 391 } 392 393 /* 394 * For a newly created process, 395 * link up all the structures and its initial threads etc. 396 */ 397 void 398 proc_linkup(struct proc *p, struct ksegrp *kg, 399 struct kse *ke, struct thread *td) 400 { 401 402 TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 403 TAILQ_INIT(&p->p_threads); /* all threads in proc */ 404 TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 405 p->p_numksegrps = 0; 406 p->p_numthreads = 0; 407 408 ksegrp_link(kg, p); 409 kse_link(ke, kg); 410 thread_link(td, kg); 411 } 412 413 /* 414 * Initialize global thread allocation resources. 415 */ 416 void 417 threadinit(void) 418 { 419 420 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 421 thread_ctor, thread_dtor, thread_init, thread_fini, 422 UMA_ALIGN_CACHE, 0); 423 tid_zone = uma_zcreate("TID", sizeof(struct tid_bitmap_part), 424 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); 425 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 426 NULL, NULL, ksegrp_init, NULL, 427 UMA_ALIGN_CACHE, 0); 428 kse_zone = uma_zcreate("KSE", sched_sizeof_kse(), 429 NULL, NULL, kse_init, NULL, 430 UMA_ALIGN_CACHE, 0); 431 kseinit(); 432 } 433 434 /* 435 * Stash an embarasingly extra thread into the zombie thread queue. 436 */ 437 void 438 thread_stash(struct thread *td) 439 { 440 mtx_lock_spin(&kse_zombie_lock); 441 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 442 mtx_unlock_spin(&kse_zombie_lock); 443 } 444 445 /* 446 * Stash an embarasingly extra kse into the zombie kse queue. 447 */ 448 void 449 kse_stash(struct kse *ke) 450 { 451 mtx_lock_spin(&kse_zombie_lock); 452 TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); 453 mtx_unlock_spin(&kse_zombie_lock); 454 } 455 456 /* 457 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 458 */ 459 void 460 ksegrp_stash(struct ksegrp *kg) 461 { 462 mtx_lock_spin(&kse_zombie_lock); 463 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 464 mtx_unlock_spin(&kse_zombie_lock); 465 } 466 467 /* 468 * Reap zombie kse resource. 469 */ 470 void 471 thread_reap(void) 472 { 473 struct thread *td_first, *td_next; 474 struct kse *ke_first, *ke_next; 475 struct ksegrp *kg_first, * kg_next; 476 477 /* 478 * Don't even bother to lock if none at this instant, 479 * we really don't care about the next instant.. 480 */ 481 if ((!TAILQ_EMPTY(&zombie_threads)) 482 || (!TAILQ_EMPTY(&zombie_kses)) 483 || (!TAILQ_EMPTY(&zombie_ksegrps))) { 484 mtx_lock_spin(&kse_zombie_lock); 485 td_first = TAILQ_FIRST(&zombie_threads); 486 ke_first = TAILQ_FIRST(&zombie_kses); 487 kg_first = TAILQ_FIRST(&zombie_ksegrps); 488 if (td_first) 489 TAILQ_INIT(&zombie_threads); 490 if (ke_first) 491 TAILQ_INIT(&zombie_kses); 492 if (kg_first) 493 TAILQ_INIT(&zombie_ksegrps); 494 mtx_unlock_spin(&kse_zombie_lock); 495 while (td_first) { 496 td_next = TAILQ_NEXT(td_first, td_runq); 497 if (td_first->td_ucred) 498 crfree(td_first->td_ucred); 499 thread_free(td_first); 500 td_first = td_next; 501 } 502 while (ke_first) { 503 ke_next = TAILQ_NEXT(ke_first, ke_procq); 504 kse_free(ke_first); 505 ke_first = ke_next; 506 } 507 while (kg_first) { 508 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 509 ksegrp_free(kg_first); 510 kg_first = kg_next; 511 } 512 } 513 kse_GC(); 514 } 515 516 /* 517 * Allocate a ksegrp. 518 */ 519 struct ksegrp * 520 ksegrp_alloc(void) 521 { 522 return (uma_zalloc(ksegrp_zone, M_WAITOK)); 523 } 524 525 /* 526 * Allocate a kse. 527 */ 528 struct kse * 529 kse_alloc(void) 530 { 531 return (uma_zalloc(kse_zone, M_WAITOK)); 532 } 533 534 /* 535 * Allocate a thread. 536 */ 537 struct thread * 538 thread_alloc(void) 539 { 540 thread_reap(); /* check if any zombies to get */ 541 return (uma_zalloc(thread_zone, M_WAITOK)); 542 } 543 544 /* 545 * Deallocate a ksegrp. 546 */ 547 void 548 ksegrp_free(struct ksegrp *td) 549 { 550 uma_zfree(ksegrp_zone, td); 551 } 552 553 /* 554 * Deallocate a kse. 555 */ 556 void 557 kse_free(struct kse *td) 558 { 559 uma_zfree(kse_zone, td); 560 } 561 562 /* 563 * Deallocate a thread. 564 */ 565 void 566 thread_free(struct thread *td) 567 { 568 569 cpu_thread_clean(td); 570 uma_zfree(thread_zone, td); 571 } 572 573 /* 574 * Discard the current thread and exit from its context. 575 * Always called with scheduler locked. 576 * 577 * Because we can't free a thread while we're operating under its context, 578 * push the current thread into our CPU's deadthread holder. This means 579 * we needn't worry about someone else grabbing our context before we 580 * do a cpu_throw(). This may not be needed now as we are under schedlock. 581 * Maybe we can just do a thread_stash() as thr_exit1 does. 582 */ 583 /* XXX 584 * libthr expects its thread exit to return for the last 585 * thread, meaning that the program is back to non-threaded 586 * mode I guess. Because we do this (cpu_throw) unconditionally 587 * here, they have their own version of it. (thr_exit1()) 588 * that doesn't do it all if this was the last thread. 589 * It is also called from thread_suspend_check(). 590 * Of course in the end, they end up coming here through exit1 591 * anyhow.. After fixing 'thr' to play by the rules we should be able 592 * to merge these two functions together. 593 */ 594 void 595 thread_exit(void) 596 { 597 struct thread *td; 598 struct kse *ke; 599 struct proc *p; 600 struct ksegrp *kg; 601 602 td = curthread; 603 kg = td->td_ksegrp; 604 p = td->td_proc; 605 ke = td->td_kse; 606 607 mtx_assert(&sched_lock, MA_OWNED); 608 KASSERT(p != NULL, ("thread exiting without a process")); 609 KASSERT(ke != NULL, ("thread exiting without a kse")); 610 KASSERT(kg != NULL, ("thread exiting without a kse group")); 611 PROC_LOCK_ASSERT(p, MA_OWNED); 612 CTR1(KTR_PROC, "thread_exit: thread %p", td); 613 mtx_assert(&Giant, MA_NOTOWNED); 614 615 if (td->td_standin != NULL) { 616 thread_stash(td->td_standin); 617 td->td_standin = NULL; 618 } 619 620 cpu_thread_exit(td); /* XXXSMP */ 621 622 /* 623 * The last thread is left attached to the process 624 * So that the whole bundle gets recycled. Skip 625 * all this stuff. 626 */ 627 if (p->p_numthreads > 1) { 628 thread_unlink(td); 629 if (p->p_maxthrwaits) 630 wakeup(&p->p_numthreads); 631 /* 632 * The test below is NOT true if we are the 633 * sole exiting thread. P_STOPPED_SNGL is unset 634 * in exit1() after it is the only survivor. 635 */ 636 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 637 if (p->p_numthreads == p->p_suspcount) { 638 thread_unsuspend_one(p->p_singlethread); 639 } 640 } 641 642 /* 643 * Because each upcall structure has an owner thread, 644 * owner thread exits only when process is in exiting 645 * state, so upcall to userland is no longer needed, 646 * deleting upcall structure is safe here. 647 * So when all threads in a group is exited, all upcalls 648 * in the group should be automatically freed. 649 */ 650 if (td->td_upcall) 651 upcall_remove(td); 652 653 sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); 654 sched_exit_kse(FIRST_KSE_IN_PROC(p), td); 655 ke->ke_state = KES_UNQUEUED; 656 ke->ke_thread = NULL; 657 /* 658 * Decide what to do with the KSE attached to this thread. 659 */ 660 if (ke->ke_flags & KEF_EXIT) { 661 kse_unlink(ke); 662 if (kg->kg_kses == 0) { 663 sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); 664 ksegrp_unlink(kg); 665 } 666 } 667 else 668 kse_reassign(ke); 669 PROC_UNLOCK(p); 670 td->td_kse = NULL; 671 #if 0 672 td->td_proc = NULL; 673 #endif 674 td->td_ksegrp = NULL; 675 td->td_last_kse = NULL; 676 PCPU_SET(deadthread, td); 677 } else { 678 PROC_UNLOCK(p); 679 } 680 td->td_state = TDS_INACTIVE; 681 /* XXX Shouldn't cpu_throw() here. */ 682 mtx_assert(&sched_lock, MA_OWNED); 683 cpu_throw(td, choosethread()); 684 panic("I'm a teapot!"); 685 /* NOTREACHED */ 686 } 687 688 /* 689 * Do any thread specific cleanups that may be needed in wait() 690 * called with Giant, proc and schedlock not held. 691 */ 692 void 693 thread_wait(struct proc *p) 694 { 695 struct thread *td; 696 697 mtx_assert(&Giant, MA_NOTOWNED); 698 KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()")); 699 KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()")); 700 FOREACH_THREAD_IN_PROC(p, td) { 701 if (td->td_standin != NULL) { 702 thread_free(td->td_standin); 703 td->td_standin = NULL; 704 } 705 cpu_thread_clean(td); 706 } 707 thread_reap(); /* check for zombie threads etc. */ 708 } 709 710 /* 711 * Link a thread to a process. 712 * set up anything that needs to be initialized for it to 713 * be used by the process. 714 * 715 * Note that we do not link to the proc's ucred here. 716 * The thread is linked as if running but no KSE assigned. 717 */ 718 void 719 thread_link(struct thread *td, struct ksegrp *kg) 720 { 721 struct proc *p; 722 723 p = kg->kg_proc; 724 td->td_state = TDS_INACTIVE; 725 td->td_proc = p; 726 td->td_ksegrp = kg; 727 td->td_last_kse = NULL; 728 td->td_flags = 0; 729 td->td_kflags = 0; 730 td->td_kse = NULL; 731 732 LIST_INIT(&td->td_contested); 733 callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); 734 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 735 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 736 p->p_numthreads++; 737 kg->kg_numthreads++; 738 } 739 740 void 741 thread_unlink(struct thread *td) 742 { 743 struct proc *p = td->td_proc; 744 struct ksegrp *kg = td->td_ksegrp; 745 746 mtx_assert(&sched_lock, MA_OWNED); 747 TAILQ_REMOVE(&p->p_threads, td, td_plist); 748 p->p_numthreads--; 749 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 750 kg->kg_numthreads--; 751 /* could clear a few other things here */ 752 } 753 754 /* 755 * Purge a ksegrp resource. When a ksegrp is preparing to 756 * exit, it calls this function. 757 */ 758 void 759 kse_purge_group(struct thread *td) 760 { 761 struct ksegrp *kg; 762 struct kse *ke; 763 764 kg = td->td_ksegrp; 765 KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__)); 766 while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 767 KASSERT(ke->ke_state == KES_IDLE, 768 ("%s: wrong idle KSE state", __func__)); 769 kse_unlink(ke); 770 } 771 KASSERT((kg->kg_kses == 1), 772 ("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses)); 773 KASSERT((kg->kg_numupcalls == 0), 774 ("%s: ksegrp still has %d upcall datas", 775 __func__, kg->kg_numupcalls)); 776 } 777 778 /* 779 * Purge a process's KSE resource. When a process is preparing to 780 * exit, it calls kse_purge to release any extra KSE resources in 781 * the process. 782 */ 783 void 784 kse_purge(struct proc *p, struct thread *td) 785 { 786 struct ksegrp *kg; 787 struct kse *ke; 788 789 KASSERT(p->p_numthreads == 1, ("bad thread number")); 790 while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) { 791 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 792 p->p_numksegrps--; 793 /* 794 * There is no ownership for KSE, after all threads 795 * in the group exited, it is possible that some KSEs 796 * were left in idle queue, gc them now. 797 */ 798 while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 799 KASSERT(ke->ke_state == KES_IDLE, 800 ("%s: wrong idle KSE state", __func__)); 801 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 802 kg->kg_idle_kses--; 803 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 804 kg->kg_kses--; 805 kse_stash(ke); 806 } 807 KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) || 808 ((kg->kg_kses == 1) && (kg == td->td_ksegrp)), 809 ("ksegrp has wrong kg_kses: %d", kg->kg_kses)); 810 KASSERT((kg->kg_numupcalls == 0), 811 ("%s: ksegrp still has %d upcall datas", 812 __func__, kg->kg_numupcalls)); 813 814 if (kg != td->td_ksegrp) 815 ksegrp_stash(kg); 816 } 817 TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp); 818 p->p_numksegrps++; 819 } 820 821 /* 822 * Enforce single-threading. 823 * 824 * Returns 1 if the caller must abort (another thread is waiting to 825 * exit the process or similar). Process is locked! 826 * Returns 0 when you are successfully the only thread running. 827 * A process has successfully single threaded in the suspend mode when 828 * There are no threads in user mode. Threads in the kernel must be 829 * allowed to continue until they get to the user boundary. They may even 830 * copy out their return values and data before suspending. They may however be 831 * accellerated in reaching the user boundary as we will wake up 832 * any sleeping threads that are interruptable. (PCATCH). 833 */ 834 int 835 thread_single(int force_exit) 836 { 837 struct thread *td; 838 struct thread *td2; 839 struct proc *p; 840 int remaining; 841 842 td = curthread; 843 p = td->td_proc; 844 mtx_assert(&Giant, MA_NOTOWNED); 845 PROC_LOCK_ASSERT(p, MA_OWNED); 846 KASSERT((td != NULL), ("curthread is NULL")); 847 848 if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1) 849 return (0); 850 851 /* Is someone already single threading? */ 852 if (p->p_singlethread) 853 return (1); 854 855 if (force_exit == SINGLE_EXIT) { 856 p->p_flag |= P_SINGLE_EXIT; 857 } else 858 p->p_flag &= ~P_SINGLE_EXIT; 859 p->p_flag |= P_STOPPED_SINGLE; 860 mtx_lock_spin(&sched_lock); 861 p->p_singlethread = td; 862 if (force_exit == SINGLE_EXIT) 863 remaining = p->p_numthreads; 864 else 865 remaining = p->p_numthreads - p->p_suspcount; 866 while (remaining != 1) { 867 FOREACH_THREAD_IN_PROC(p, td2) { 868 if (td2 == td) 869 continue; 870 td2->td_flags |= TDF_ASTPENDING; 871 if (TD_IS_INHIBITED(td2)) { 872 if (force_exit == SINGLE_EXIT) { 873 if (td->td_flags & TDF_DBSUSPEND) 874 td->td_flags &= ~TDF_DBSUSPEND; 875 if (TD_IS_SUSPENDED(td2)) { 876 thread_unsuspend_one(td2); 877 } 878 if (TD_ON_SLEEPQ(td2) && 879 (td2->td_flags & TDF_SINTR)) { 880 sleepq_abort(td2); 881 } 882 } else { 883 if (TD_IS_SUSPENDED(td2)) 884 continue; 885 /* 886 * maybe other inhibitted states too? 887 * XXXKSE Is it totally safe to 888 * suspend a non-interruptable thread? 889 */ 890 if (td2->td_inhibitors & 891 (TDI_SLEEPING | TDI_SWAPPED)) 892 thread_suspend_one(td2); 893 } 894 } 895 } 896 if (force_exit == SINGLE_EXIT) 897 remaining = p->p_numthreads; 898 else 899 remaining = p->p_numthreads - p->p_suspcount; 900 901 /* 902 * Maybe we suspended some threads.. was it enough? 903 */ 904 if (remaining == 1) 905 break; 906 907 /* 908 * Wake us up when everyone else has suspended. 909 * In the mean time we suspend as well. 910 */ 911 thread_suspend_one(td); 912 PROC_UNLOCK(p); 913 mi_switch(SW_VOL, NULL); 914 mtx_unlock_spin(&sched_lock); 915 PROC_LOCK(p); 916 mtx_lock_spin(&sched_lock); 917 if (force_exit == SINGLE_EXIT) 918 remaining = p->p_numthreads; 919 else 920 remaining = p->p_numthreads - p->p_suspcount; 921 } 922 if (force_exit == SINGLE_EXIT) { 923 if (td->td_upcall) 924 upcall_remove(td); 925 kse_purge(p, td); 926 } 927 mtx_unlock_spin(&sched_lock); 928 return (0); 929 } 930 931 /* 932 * Called in from locations that can safely check to see 933 * whether we have to suspend or at least throttle for a 934 * single-thread event (e.g. fork). 935 * 936 * Such locations include userret(). 937 * If the "return_instead" argument is non zero, the thread must be able to 938 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 939 * 940 * The 'return_instead' argument tells the function if it may do a 941 * thread_exit() or suspend, or whether the caller must abort and back 942 * out instead. 943 * 944 * If the thread that set the single_threading request has set the 945 * P_SINGLE_EXIT bit in the process flags then this call will never return 946 * if 'return_instead' is false, but will exit. 947 * 948 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 949 *---------------+--------------------+--------------------- 950 * 0 | returns 0 | returns 0 or 1 951 * | when ST ends | immediatly 952 *---------------+--------------------+--------------------- 953 * 1 | thread exits | returns 1 954 * | | immediatly 955 * 0 = thread_exit() or suspension ok, 956 * other = return error instead of stopping the thread. 957 * 958 * While a full suspension is under effect, even a single threading 959 * thread would be suspended if it made this call (but it shouldn't). 960 * This call should only be made from places where 961 * thread_exit() would be safe as that may be the outcome unless 962 * return_instead is set. 963 */ 964 int 965 thread_suspend_check(int return_instead) 966 { 967 struct thread *td; 968 struct proc *p; 969 970 td = curthread; 971 p = td->td_proc; 972 mtx_assert(&Giant, MA_NOTOWNED); 973 PROC_LOCK_ASSERT(p, MA_OWNED); 974 while (P_SHOULDSTOP(p) || 975 ((p->p_flag & P_TRACED) && (td->td_flags & TDF_DBSUSPEND))) { 976 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 977 KASSERT(p->p_singlethread != NULL, 978 ("singlethread not set")); 979 /* 980 * The only suspension in action is a 981 * single-threading. Single threader need not stop. 982 * XXX Should be safe to access unlocked 983 * as it can only be set to be true by us. 984 */ 985 if (p->p_singlethread == td) 986 return (0); /* Exempt from stopping. */ 987 } 988 if (return_instead) 989 return (1); 990 991 mtx_lock_spin(&sched_lock); 992 thread_stopped(p); 993 /* 994 * If the process is waiting for us to exit, 995 * this thread should just suicide. 996 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 997 */ 998 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 999 if (p->p_flag & P_SA) 1000 thread_exit(); 1001 else 1002 thr_exit1(); 1003 } 1004 1005 /* 1006 * When a thread suspends, it just 1007 * moves to the processes's suspend queue 1008 * and stays there. 1009 */ 1010 thread_suspend_one(td); 1011 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1012 if (p->p_numthreads == p->p_suspcount) { 1013 thread_unsuspend_one(p->p_singlethread); 1014 } 1015 } 1016 PROC_UNLOCK(p); 1017 mi_switch(SW_INVOL, NULL); 1018 mtx_unlock_spin(&sched_lock); 1019 PROC_LOCK(p); 1020 } 1021 return (0); 1022 } 1023 1024 void 1025 thread_suspend_one(struct thread *td) 1026 { 1027 struct proc *p = td->td_proc; 1028 1029 mtx_assert(&sched_lock, MA_OWNED); 1030 PROC_LOCK_ASSERT(p, MA_OWNED); 1031 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); 1032 p->p_suspcount++; 1033 TD_SET_SUSPENDED(td); 1034 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 1035 /* 1036 * Hack: If we are suspending but are on the sleep queue 1037 * then we are in msleep or the cv equivalent. We 1038 * want to look like we have two Inhibitors. 1039 * May already be set.. doesn't matter. 1040 */ 1041 if (TD_ON_SLEEPQ(td)) 1042 TD_SET_SLEEPING(td); 1043 } 1044 1045 void 1046 thread_unsuspend_one(struct thread *td) 1047 { 1048 struct proc *p = td->td_proc; 1049 1050 mtx_assert(&sched_lock, MA_OWNED); 1051 PROC_LOCK_ASSERT(p, MA_OWNED); 1052 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1053 TD_CLR_SUSPENDED(td); 1054 p->p_suspcount--; 1055 setrunnable(td); 1056 } 1057 1058 /* 1059 * Allow all threads blocked by single threading to continue running. 1060 */ 1061 void 1062 thread_unsuspend(struct proc *p) 1063 { 1064 struct thread *td; 1065 1066 mtx_assert(&sched_lock, MA_OWNED); 1067 PROC_LOCK_ASSERT(p, MA_OWNED); 1068 if (!P_SHOULDSTOP(p)) { 1069 while ((td = TAILQ_FIRST(&p->p_suspended))) { 1070 thread_unsuspend_one(td); 1071 } 1072 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1073 (p->p_numthreads == p->p_suspcount)) { 1074 /* 1075 * Stopping everything also did the job for the single 1076 * threading request. Now we've downgraded to single-threaded, 1077 * let it continue. 1078 */ 1079 thread_unsuspend_one(p->p_singlethread); 1080 } 1081 } 1082 1083 void 1084 thread_single_end(void) 1085 { 1086 struct thread *td; 1087 struct proc *p; 1088 1089 td = curthread; 1090 p = td->td_proc; 1091 PROC_LOCK_ASSERT(p, MA_OWNED); 1092 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT); 1093 mtx_lock_spin(&sched_lock); 1094 p->p_singlethread = NULL; 1095 /* 1096 * If there are other threads they mey now run, 1097 * unless of course there is a blanket 'stop order' 1098 * on the process. The single threader must be allowed 1099 * to continue however as this is a bad place to stop. 1100 */ 1101 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 1102 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1103 thread_unsuspend_one(td); 1104 } 1105 } 1106 mtx_unlock_spin(&sched_lock); 1107 } 1108 1109