1 /* 2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice(s), this list of conditions and the following disclaimer as 10 * the first lines of this file unmodified other than the possible 11 * addition of one or more copyright notices. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice(s), this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/smp.h> 39 #include <sys/sysctl.h> 40 #include <sys/sysproto.h> 41 #include <sys/filedesc.h> 42 #include <sys/sched.h> 43 #include <sys/signalvar.h> 44 #include <sys/sx.h> 45 #include <sys/tty.h> 46 #include <sys/user.h> 47 #include <sys/jail.h> 48 #include <sys/kse.h> 49 #include <sys/ktr.h> 50 #include <sys/ucontext.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_object.h> 54 #include <vm/pmap.h> 55 #include <vm/uma.h> 56 #include <vm/vm_map.h> 57 58 #include <machine/frame.h> 59 60 /* 61 * KSEGRP related storage. 62 */ 63 static uma_zone_t ksegrp_zone; 64 static uma_zone_t kse_zone; 65 static uma_zone_t thread_zone; 66 67 /* DEBUG ONLY */ 68 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 69 static int oiks_debug = 0; /* 0 disable, 1 printf, 2 enter debugger */ 70 SYSCTL_INT(_kern_threads, OID_AUTO, oiks, CTLFLAG_RW, 71 &oiks_debug, 0, "OIKS thread debug"); 72 73 static int oiks_max_threads_per_proc = 10; 74 SYSCTL_INT(_kern_threads, OID_AUTO, oiks_max_per_proc, CTLFLAG_RW, 75 &oiks_max_threads_per_proc, 0, "Debug limit on threads per proc"); 76 77 static int max_threads_per_proc = 30; 78 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 79 &max_threads_per_proc, 0, "Limit on threads per proc"); 80 81 static int max_groups_per_proc = 5; 82 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 83 &max_groups_per_proc, 0, "Limit on thread groups per proc"); 84 85 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 86 87 struct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 88 TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); 89 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 90 struct mtx zombie_thread_lock; 91 MTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock, 92 "zombie_thread_lock", MTX_SPIN); 93 94 95 96 void kse_purge(struct proc *p, struct thread *td); 97 /* 98 * Pepare a thread for use. 99 */ 100 static void 101 thread_ctor(void *mem, int size, void *arg) 102 { 103 struct thread *td; 104 105 td = (struct thread *)mem; 106 td->td_state = TDS_INACTIVE; 107 td->td_flags |= TDF_UNBOUND; 108 } 109 110 /* 111 * Reclaim a thread after use. 112 */ 113 static void 114 thread_dtor(void *mem, int size, void *arg) 115 { 116 struct thread *td; 117 118 mtx_assert(&Giant, MA_OWNED); 119 td = (struct thread *)mem; 120 121 #ifdef INVARIANTS 122 /* Verify that this thread is in a safe state to free. */ 123 switch (td->td_state) { 124 case TDS_INHIBITED: 125 case TDS_RUNNING: 126 case TDS_CAN_RUN: 127 case TDS_RUNQ: 128 /* 129 * We must never unlink a thread that is in one of 130 * these states, because it is currently active. 131 */ 132 panic("bad state for thread unlinking"); 133 /* NOTREACHED */ 134 case TDS_INACTIVE: 135 break; 136 default: 137 panic("bad thread state"); 138 /* NOTREACHED */ 139 } 140 #endif 141 142 cpu_thread_dtor(td); 143 } 144 145 /* 146 * Initialize type-stable parts of a thread (when newly created). 147 */ 148 static void 149 thread_init(void *mem, int size) 150 { 151 struct thread *td; 152 153 td = (struct thread *)mem; 154 mtx_lock(&Giant); 155 pmap_new_thread(td, 0); 156 mtx_unlock(&Giant); 157 cpu_thread_setup(td); 158 td->td_sched = (struct td_sched *)&td[1]; 159 } 160 161 /* 162 * Tear down type-stable parts of a thread (just before being discarded). 163 */ 164 static void 165 thread_fini(void *mem, int size) 166 { 167 struct thread *td; 168 169 td = (struct thread *)mem; 170 pmap_dispose_thread(td); 171 } 172 /* 173 * Initialize type-stable parts of a kse (when newly created). 174 */ 175 static void 176 kse_init(void *mem, int size) 177 { 178 struct kse *ke; 179 180 ke = (struct kse *)mem; 181 ke->ke_sched = (struct ke_sched *)&ke[1]; 182 } 183 /* 184 * Initialize type-stable parts of a ksegrp (when newly created). 185 */ 186 static void 187 ksegrp_init(void *mem, int size) 188 { 189 struct ksegrp *kg; 190 191 kg = (struct ksegrp *)mem; 192 kg->kg_sched = (struct kg_sched *)&kg[1]; 193 } 194 195 /* 196 * KSE is linked onto the idle queue. 197 */ 198 void 199 kse_link(struct kse *ke, struct ksegrp *kg) 200 { 201 struct proc *p = kg->kg_proc; 202 203 TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); 204 kg->kg_kses++; 205 ke->ke_state = KES_UNQUEUED; 206 ke->ke_proc = p; 207 ke->ke_ksegrp = kg; 208 ke->ke_thread = NULL; 209 ke->ke_oncpu = NOCPU; 210 } 211 212 void 213 kse_unlink(struct kse *ke) 214 { 215 struct ksegrp *kg; 216 217 mtx_assert(&sched_lock, MA_OWNED); 218 kg = ke->ke_ksegrp; 219 if (ke->ke_state == KES_IDLE) { 220 kg->kg_idle_kses--; 221 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 222 } 223 224 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 225 if (--kg->kg_kses == 0) { 226 ksegrp_unlink(kg); 227 } 228 /* 229 * Aggregate stats from the KSE 230 */ 231 kse_stash(ke); 232 } 233 234 void 235 ksegrp_link(struct ksegrp *kg, struct proc *p) 236 { 237 238 TAILQ_INIT(&kg->kg_threads); 239 TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 240 TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */ 241 TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ 242 TAILQ_INIT(&kg->kg_iq); /* idle kses in ksegrp */ 243 TAILQ_INIT(&kg->kg_lq); /* loan kses in ksegrp */ 244 kg->kg_proc = p; 245 /* the following counters are in the -zero- section and may not need clearing */ 246 kg->kg_numthreads = 0; 247 kg->kg_runnable = 0; 248 kg->kg_kses = 0; 249 kg->kg_idle_kses = 0; 250 kg->kg_loan_kses = 0; 251 kg->kg_runq_kses = 0; /* XXXKSE change name */ 252 /* link it in now that it's consistent */ 253 p->p_numksegrps++; 254 TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 255 } 256 257 void 258 ksegrp_unlink(struct ksegrp *kg) 259 { 260 struct proc *p; 261 262 mtx_assert(&sched_lock, MA_OWNED); 263 p = kg->kg_proc; 264 KASSERT(((kg->kg_numthreads == 0) && (kg->kg_kses == 0)), 265 ("kseg_unlink: residual threads or KSEs")); 266 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 267 p->p_numksegrps--; 268 /* 269 * Aggregate stats from the KSE 270 */ 271 ksegrp_stash(kg); 272 } 273 274 /* 275 * for a newly created process, 276 * link up a the structure and its initial threads etc. 277 */ 278 void 279 proc_linkup(struct proc *p, struct ksegrp *kg, 280 struct kse *ke, struct thread *td) 281 { 282 283 TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 284 TAILQ_INIT(&p->p_threads); /* all threads in proc */ 285 TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 286 p->p_numksegrps = 0; 287 p->p_numthreads = 0; 288 289 ksegrp_link(kg, p); 290 kse_link(ke, kg); 291 thread_link(td, kg); 292 } 293 294 int 295 kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap) 296 { 297 struct proc *p; 298 struct thread *td2; 299 300 p = td->td_proc; 301 /* KSE-enabled processes only, please. */ 302 if (!(p->p_flag & P_KSES)) 303 return (EINVAL); 304 if (uap->tmbx == NULL) 305 return (EINVAL); 306 mtx_lock_spin(&sched_lock); 307 FOREACH_THREAD_IN_PROC(p, td2) { 308 if (td2->td_mailbox == uap->tmbx) { 309 td2->td_flags |= TDF_INTERRUPT; 310 if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) { 311 if (td2->td_flags & TDF_CVWAITQ) 312 cv_abort(td2); 313 else 314 abortsleep(td2); 315 } 316 mtx_unlock_spin(&sched_lock); 317 td->td_retval[0] = 0; 318 td->td_retval[1] = 0; 319 return (0); 320 } 321 } 322 mtx_unlock_spin(&sched_lock); 323 return (ESRCH); 324 } 325 326 int 327 kse_exit(struct thread *td, struct kse_exit_args *uap) 328 { 329 struct proc *p; 330 struct ksegrp *kg; 331 332 p = td->td_proc; 333 /* KSE-enabled processes only, please. */ 334 if (!(p->p_flag & P_KSES)) 335 return (EINVAL); 336 /* must be a bound thread */ 337 if (td->td_flags & TDF_UNBOUND) 338 return (EINVAL); 339 kg = td->td_ksegrp; 340 /* serialize killing kse */ 341 PROC_LOCK(p); 342 mtx_lock_spin(&sched_lock); 343 if ((kg->kg_kses == 1) && (kg->kg_numthreads > 1)) { 344 mtx_unlock_spin(&sched_lock); 345 PROC_UNLOCK(p); 346 return (EDEADLK); 347 } 348 if ((p->p_numthreads == 1) && (p->p_numksegrps == 1)) { 349 p->p_flag &= ~P_KSES; 350 mtx_unlock_spin(&sched_lock); 351 PROC_UNLOCK(p); 352 } else { 353 while (mtx_owned(&Giant)) 354 mtx_unlock(&Giant); 355 td->td_kse->ke_flags |= KEF_EXIT; 356 thread_exit(); 357 /* NOTREACHED */ 358 } 359 return (0); 360 } 361 362 int 363 kse_release(struct thread *td, struct kse_release_args *uap) 364 { 365 struct proc *p; 366 367 p = td->td_proc; 368 /* KSE-enabled processes only */ 369 if (!(p->p_flag & P_KSES)) 370 return (EINVAL); 371 /* 372 * Must be a bound thread. And kse must have a mailbox ready, 373 * if not, the kse would can not generate an upcall. 374 */ 375 if (!(td->td_flags & TDF_UNBOUND) && (td->td_kse->ke_mailbox != NULL)) { 376 PROC_LOCK(p); 377 mtx_lock_spin(&sched_lock); 378 /* prevent last thread from exiting */ 379 if (p->p_numthreads == 1) { 380 mtx_unlock_spin(&sched_lock); 381 if (td->td_standin == NULL) { 382 PROC_UNLOCK(p); 383 td->td_standin = thread_alloc(); 384 PROC_LOCK(p); 385 } 386 msleep(p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH, 387 "pause", 0); 388 mtx_lock_spin(&sched_lock); 389 td->td_flags |= TDF_UNBOUND; 390 thread_schedule_upcall(td, td->td_kse); 391 } 392 thread_exit(); 393 /* NOTREACHED */ 394 } 395 return (EINVAL); 396 } 397 398 /* struct kse_wakeup_args { 399 struct kse_mailbox *mbx; 400 }; */ 401 int 402 kse_wakeup(struct thread *td, struct kse_wakeup_args *uap) 403 { 404 struct proc *p; 405 struct kse *ke, *ke2; 406 struct ksegrp *kg; 407 408 p = td->td_proc; 409 /* KSE-enabled processes only, please. */ 410 if (!(p->p_flag & P_KSES)) 411 return EINVAL; 412 if (td->td_standin == NULL) 413 td->td_standin = thread_alloc(); 414 ke = NULL; 415 mtx_lock_spin(&sched_lock); 416 if (uap->mbx) { 417 FOREACH_KSEGRP_IN_PROC(p, kg) { 418 FOREACH_KSE_IN_GROUP(kg, ke2) { 419 if (ke2->ke_mailbox != uap->mbx) 420 continue; 421 if (ke2->ke_state == KES_IDLE) { 422 ke = ke2; 423 goto found; 424 } else { 425 mtx_unlock_spin(&sched_lock); 426 td->td_retval[0] = 0; 427 td->td_retval[1] = 0; 428 return (0); 429 } 430 } 431 } 432 } else { 433 kg = td->td_ksegrp; 434 ke = TAILQ_FIRST(&kg->kg_iq); 435 } 436 if (ke == NULL) { 437 mtx_unlock_spin(&sched_lock); 438 return (ESRCH); 439 } 440 found: 441 thread_schedule_upcall(td, ke); 442 mtx_unlock_spin(&sched_lock); 443 td->td_retval[0] = 0; 444 td->td_retval[1] = 0; 445 return (0); 446 } 447 448 /* 449 * No new KSEG: first call: use current KSE, don't schedule an upcall 450 * All other situations, do allocate a new KSE and schedule an upcall on it. 451 */ 452 /* struct kse_create_args { 453 struct kse_mailbox *mbx; 454 int newgroup; 455 }; */ 456 int 457 kse_create(struct thread *td, struct kse_create_args *uap) 458 { 459 struct kse *newke; 460 struct kse *ke; 461 struct ksegrp *newkg; 462 struct ksegrp *kg; 463 struct proc *p; 464 struct kse_mailbox mbx; 465 int err; 466 467 p = td->td_proc; 468 if ((err = copyin(uap->mbx, &mbx, sizeof(mbx)))) 469 return (err); 470 471 p->p_flag |= P_KSES; /* easier to just set it than to test and set */ 472 kg = td->td_ksegrp; 473 if (uap->newgroup) { 474 if (p->p_numksegrps >= max_groups_per_proc) 475 return (EPROCLIM); 476 /* 477 * If we want a new KSEGRP it doesn't matter whether 478 * we have already fired up KSE mode before or not. 479 * We put the process in KSE mode and create a new KSEGRP 480 * and KSE. If our KSE has not got a mailbox yet then 481 * that doesn't matter, just leave it that way. It will 482 * ensure that this thread stay BOUND. It's possible 483 * that the call came form a threaded library and the main 484 * program knows nothing of threads. 485 */ 486 newkg = ksegrp_alloc(); 487 bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp, 488 kg_startzero, kg_endzero)); 489 bcopy(&kg->kg_startcopy, &newkg->kg_startcopy, 490 RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy)); 491 newke = kse_alloc(); 492 } else { 493 /* 494 * Otherwise, if we have already set this KSE 495 * to have a mailbox, we want to make another KSE here, 496 * but only if there are not already the limit, which 497 * is 1 per CPU max. 498 * 499 * If the current KSE doesn't have a mailbox we just use it 500 * and give it one. 501 * 502 * Because we don't like to access 503 * the KSE outside of schedlock if we are UNBOUND, 504 * (because it can change if we are preempted by an interrupt) 505 * we can deduce it as having a mailbox if we are UNBOUND, 506 * and only need to actually look at it if we are BOUND, 507 * which is safe. 508 */ 509 if ((td->td_flags & TDF_UNBOUND) || td->td_kse->ke_mailbox) { 510 if (oiks_debug == 0) { 511 #ifdef SMP 512 if (kg->kg_kses > mp_ncpus) 513 #endif 514 return (EPROCLIM); 515 } 516 newke = kse_alloc(); 517 } else { 518 newke = NULL; 519 } 520 newkg = NULL; 521 } 522 if (newke) { 523 bzero(&newke->ke_startzero, RANGEOF(struct kse, 524 ke_startzero, ke_endzero)); 525 #if 0 526 bcopy(&ke->ke_startcopy, &newke->ke_startcopy, 527 RANGEOF(struct kse, ke_startcopy, ke_endcopy)); 528 #endif 529 /* For the first call this may not have been set */ 530 if (td->td_standin == NULL) { 531 td->td_standin = thread_alloc(); 532 } 533 mtx_lock_spin(&sched_lock); 534 if (newkg) { 535 if (p->p_numksegrps >= max_groups_per_proc) { 536 mtx_unlock_spin(&sched_lock); 537 ksegrp_free(newkg); 538 kse_free(newke); 539 return (EPROCLIM); 540 } 541 ksegrp_link(newkg, p); 542 } 543 else 544 newkg = kg; 545 kse_link(newke, newkg); 546 if (p->p_sflag & PS_NEEDSIGCHK) 547 newke->ke_flags |= KEF_ASTPENDING; 548 newke->ke_mailbox = uap->mbx; 549 newke->ke_upcall = mbx.km_func; 550 bcopy(&mbx.km_stack, &newke->ke_stack, sizeof(stack_t)); 551 thread_schedule_upcall(td, newke); 552 mtx_unlock_spin(&sched_lock); 553 } else { 554 /* 555 * If we didn't allocate a new KSE then the we are using 556 * the exisiting (BOUND) kse. 557 */ 558 ke = td->td_kse; 559 ke->ke_mailbox = uap->mbx; 560 ke->ke_upcall = mbx.km_func; 561 bcopy(&mbx.km_stack, &ke->ke_stack, sizeof(stack_t)); 562 } 563 /* 564 * Fill out the KSE-mode specific fields of the new kse. 565 */ 566 567 td->td_retval[0] = 0; 568 td->td_retval[1] = 0; 569 return (0); 570 } 571 572 /* 573 * Fill a ucontext_t with a thread's context information. 574 * 575 * This is an analogue to getcontext(3). 576 */ 577 void 578 thread_getcontext(struct thread *td, ucontext_t *uc) 579 { 580 581 /* 582 * XXX this is declared in a MD include file, i386/include/ucontext.h but 583 * is used in MI code. 584 */ 585 #ifdef __i386__ 586 get_mcontext(td, &uc->uc_mcontext); 587 #endif 588 uc->uc_sigmask = td->td_proc->p_sigmask; 589 } 590 591 /* 592 * Set a thread's context from a ucontext_t. 593 * 594 * This is an analogue to setcontext(3). 595 */ 596 int 597 thread_setcontext(struct thread *td, ucontext_t *uc) 598 { 599 int ret; 600 601 /* 602 * XXX this is declared in a MD include file, i386/include/ucontext.h but 603 * is used in MI code. 604 */ 605 #ifdef __i386__ 606 ret = set_mcontext(td, &uc->uc_mcontext); 607 #else 608 ret = ENOSYS; 609 #endif 610 if (ret == 0) { 611 SIG_CANTMASK(uc->uc_sigmask); 612 PROC_LOCK(td->td_proc); 613 td->td_proc->p_sigmask = uc->uc_sigmask; 614 PROC_UNLOCK(td->td_proc); 615 } 616 return (ret); 617 } 618 619 /* 620 * Initialize global thread allocation resources. 621 */ 622 void 623 threadinit(void) 624 { 625 626 #ifndef __ia64__ 627 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 628 thread_ctor, thread_dtor, thread_init, thread_fini, 629 UMA_ALIGN_CACHE, 0); 630 #else 631 /* 632 * XXX the ia64 kstack allocator is really lame and is at the mercy 633 * of contigmallloc(). This hackery is to pre-construct a whole 634 * pile of thread structures with associated kernel stacks early 635 * in the system startup while contigmalloc() still works. Once we 636 * have them, keep them. Sigh. 637 */ 638 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 639 thread_ctor, thread_dtor, thread_init, thread_fini, 640 UMA_ALIGN_CACHE, UMA_ZONE_NOFREE); 641 uma_prealloc(thread_zone, 512); /* XXX arbitary */ 642 #endif 643 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 644 NULL, NULL, ksegrp_init, NULL, 645 UMA_ALIGN_CACHE, 0); 646 kse_zone = uma_zcreate("KSE", sched_sizeof_kse(), 647 NULL, NULL, kse_init, NULL, 648 UMA_ALIGN_CACHE, 0); 649 } 650 651 /* 652 * Stash an embarasingly extra thread into the zombie thread queue. 653 */ 654 void 655 thread_stash(struct thread *td) 656 { 657 mtx_lock_spin(&zombie_thread_lock); 658 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 659 mtx_unlock_spin(&zombie_thread_lock); 660 } 661 662 /* 663 * Stash an embarasingly extra kse into the zombie kse queue. 664 */ 665 void 666 kse_stash(struct kse *ke) 667 { 668 mtx_lock_spin(&zombie_thread_lock); 669 TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); 670 mtx_unlock_spin(&zombie_thread_lock); 671 } 672 673 /* 674 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 675 */ 676 void 677 ksegrp_stash(struct ksegrp *kg) 678 { 679 mtx_lock_spin(&zombie_thread_lock); 680 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 681 mtx_unlock_spin(&zombie_thread_lock); 682 } 683 684 /* 685 * Reap zombie threads. 686 */ 687 void 688 thread_reap(void) 689 { 690 struct thread *td_first, *td_next; 691 struct kse *ke_first, *ke_next; 692 struct ksegrp *kg_first, * kg_next; 693 694 /* 695 * don't even bother to lock if none at this instant 696 * We really don't care about the next instant.. 697 */ 698 if ((!TAILQ_EMPTY(&zombie_threads)) 699 || (!TAILQ_EMPTY(&zombie_kses)) 700 || (!TAILQ_EMPTY(&zombie_ksegrps))) { 701 mtx_lock_spin(&zombie_thread_lock); 702 td_first = TAILQ_FIRST(&zombie_threads); 703 ke_first = TAILQ_FIRST(&zombie_kses); 704 kg_first = TAILQ_FIRST(&zombie_ksegrps); 705 if (td_first) 706 TAILQ_INIT(&zombie_threads); 707 if (ke_first) 708 TAILQ_INIT(&zombie_kses); 709 if (kg_first) 710 TAILQ_INIT(&zombie_ksegrps); 711 mtx_unlock_spin(&zombie_thread_lock); 712 while (td_first) { 713 td_next = TAILQ_NEXT(td_first, td_runq); 714 thread_free(td_first); 715 td_first = td_next; 716 } 717 while (ke_first) { 718 ke_next = TAILQ_NEXT(ke_first, ke_procq); 719 kse_free(ke_first); 720 ke_first = ke_next; 721 } 722 while (kg_first) { 723 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 724 ksegrp_free(kg_first); 725 kg_first = kg_next; 726 } 727 } 728 } 729 730 /* 731 * Allocate a ksegrp. 732 */ 733 struct ksegrp * 734 ksegrp_alloc(void) 735 { 736 return (uma_zalloc(ksegrp_zone, M_WAITOK)); 737 } 738 739 /* 740 * Allocate a kse. 741 */ 742 struct kse * 743 kse_alloc(void) 744 { 745 return (uma_zalloc(kse_zone, M_WAITOK)); 746 } 747 748 /* 749 * Allocate a thread. 750 */ 751 struct thread * 752 thread_alloc(void) 753 { 754 thread_reap(); /* check if any zombies to get */ 755 return (uma_zalloc(thread_zone, M_WAITOK)); 756 } 757 758 /* 759 * Deallocate a ksegrp. 760 */ 761 void 762 ksegrp_free(struct ksegrp *td) 763 { 764 uma_zfree(ksegrp_zone, td); 765 } 766 767 /* 768 * Deallocate a kse. 769 */ 770 void 771 kse_free(struct kse *td) 772 { 773 uma_zfree(kse_zone, td); 774 } 775 776 /* 777 * Deallocate a thread. 778 */ 779 void 780 thread_free(struct thread *td) 781 { 782 uma_zfree(thread_zone, td); 783 } 784 785 /* 786 * Store the thread context in the UTS's mailbox. 787 * then add the mailbox at the head of a list we are building in user space. 788 * The list is anchored in the ksegrp structure. 789 */ 790 int 791 thread_export_context(struct thread *td) 792 { 793 struct proc *p; 794 struct ksegrp *kg; 795 uintptr_t mbx; 796 void *addr; 797 int error; 798 ucontext_t uc; 799 uint temp; 800 801 p = td->td_proc; 802 kg = td->td_ksegrp; 803 804 /* Export the user/machine context. */ 805 #if 0 806 addr = (caddr_t)td->td_mailbox + 807 offsetof(struct kse_thr_mailbox, tm_context); 808 #else /* if user pointer arithmetic is valid in the kernel */ 809 addr = (void *)(&td->td_mailbox->tm_context); 810 #endif 811 error = copyin(addr, &uc, sizeof(ucontext_t)); 812 if (error == 0) { 813 thread_getcontext(td, &uc); 814 error = copyout(&uc, addr, sizeof(ucontext_t)); 815 816 } 817 if (error) { 818 PROC_LOCK(p); 819 psignal(p, SIGSEGV); 820 PROC_UNLOCK(p); 821 return (error); 822 } 823 /* get address in latest mbox of list pointer */ 824 #if 0 825 addr = (caddr_t)td->td_mailbox 826 + offsetof(struct kse_thr_mailbox , tm_next); 827 #else /* if user pointer arithmetic is valid in the kernel */ 828 addr = (void *)(&td->td_mailbox->tm_next); 829 #endif 830 /* 831 * Put the saved address of the previous first 832 * entry into this one 833 */ 834 for (;;) { 835 mbx = (uintptr_t)kg->kg_completed; 836 if (suword(addr, mbx)) { 837 goto bad; 838 } 839 PROC_LOCK(p); 840 if (mbx == (uintptr_t)kg->kg_completed) { 841 kg->kg_completed = td->td_mailbox; 842 PROC_UNLOCK(p); 843 break; 844 } 845 PROC_UNLOCK(p); 846 } 847 addr = (caddr_t)td->td_mailbox 848 + offsetof(struct kse_thr_mailbox, tm_sticks); 849 temp = fuword(addr) + td->td_usticks; 850 if (suword(addr, temp)) 851 goto bad; 852 return (0); 853 854 bad: 855 PROC_LOCK(p); 856 psignal(p, SIGSEGV); 857 PROC_UNLOCK(p); 858 return (EFAULT); 859 } 860 861 /* 862 * Take the list of completed mailboxes for this KSEGRP and put them on this 863 * KSE's mailbox as it's the next one going up. 864 */ 865 static int 866 thread_link_mboxes(struct ksegrp *kg, struct kse *ke) 867 { 868 struct proc *p = kg->kg_proc; 869 void *addr; 870 uintptr_t mbx; 871 872 #if 0 873 addr = (caddr_t)ke->ke_mailbox 874 + offsetof(struct kse_mailbox, km_completed); 875 #else /* if user pointer arithmetic is valid in the kernel */ 876 addr = (void *)(&ke->ke_mailbox->km_completed); 877 #endif 878 for (;;) { 879 mbx = (uintptr_t)kg->kg_completed; 880 if (suword(addr, mbx)) { 881 PROC_LOCK(p); 882 psignal(p, SIGSEGV); 883 PROC_UNLOCK(p); 884 return (EFAULT); 885 } 886 /* XXXKSE could use atomic CMPXCH here */ 887 PROC_LOCK(p); 888 if (mbx == (uintptr_t)kg->kg_completed) { 889 kg->kg_completed = NULL; 890 PROC_UNLOCK(p); 891 break; 892 } 893 PROC_UNLOCK(p); 894 } 895 return (0); 896 } 897 898 /* 899 * This function should be called at statclock interrupt time 900 */ 901 int 902 thread_add_ticks_intr(int user, uint ticks) 903 { 904 struct thread *td = curthread; 905 struct kse *ke = td->td_kse; 906 907 if (ke->ke_mailbox == NULL) 908 return -1; 909 if (user) { 910 /* Current always do via ast() */ 911 ke->ke_flags |= KEF_ASTPENDING; 912 ke->ke_uuticks += ticks; 913 } else { 914 if (td->td_mailbox != NULL) 915 td->td_usticks += ticks; 916 else 917 ke->ke_usticks += ticks; 918 } 919 return 0; 920 } 921 922 static int 923 thread_update_uticks(void) 924 { 925 struct thread *td = curthread; 926 struct proc *p = td->td_proc; 927 struct kse *ke = td->td_kse; 928 struct kse_thr_mailbox *tmbx; 929 caddr_t addr; 930 uint uticks, sticks; 931 932 KASSERT(!(td->td_flags & TDF_UNBOUND), ("thread not bound.")); 933 934 if (ke->ke_mailbox == NULL) 935 return 0; 936 937 uticks = ke->ke_uuticks; 938 ke->ke_uuticks = 0; 939 sticks = ke->ke_usticks; 940 ke->ke_usticks = 0; 941 tmbx = (void *)fuword((caddr_t)ke->ke_mailbox 942 + offsetof(struct kse_mailbox, km_curthread)); 943 if ((tmbx == NULL) || (tmbx == (void *)-1)) 944 return 0; 945 if (uticks) { 946 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_uticks); 947 uticks += fuword(addr); 948 if (suword(addr, uticks)) 949 goto bad; 950 } 951 if (sticks) { 952 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_sticks); 953 sticks += fuword(addr); 954 if (suword(addr, sticks)) 955 goto bad; 956 } 957 return 0; 958 bad: 959 PROC_LOCK(p); 960 psignal(p, SIGSEGV); 961 PROC_UNLOCK(p); 962 return -1; 963 } 964 965 /* 966 * Discard the current thread and exit from its context. 967 * 968 * Because we can't free a thread while we're operating under its context, 969 * push the current thread into our KSE's ke_tdspare slot, freeing the 970 * thread that might be there currently. Because we know that only this 971 * processor will run our KSE, we needn't worry about someone else grabbing 972 * our context before we do a cpu_throw. 973 */ 974 void 975 thread_exit(void) 976 { 977 struct thread *td; 978 struct kse *ke; 979 struct proc *p; 980 struct ksegrp *kg; 981 982 td = curthread; 983 kg = td->td_ksegrp; 984 p = td->td_proc; 985 ke = td->td_kse; 986 987 mtx_assert(&sched_lock, MA_OWNED); 988 KASSERT(p != NULL, ("thread exiting without a process")); 989 KASSERT(ke != NULL, ("thread exiting without a kse")); 990 KASSERT(kg != NULL, ("thread exiting without a kse group")); 991 PROC_LOCK_ASSERT(p, MA_OWNED); 992 CTR1(KTR_PROC, "thread_exit: thread %p", td); 993 KASSERT(!mtx_owned(&Giant), ("dying thread owns giant")); 994 995 if (ke->ke_tdspare != NULL) { 996 thread_stash(ke->ke_tdspare); 997 ke->ke_tdspare = NULL; 998 } 999 if (td->td_standin != NULL) { 1000 thread_stash(td->td_standin); 1001 td->td_standin = NULL; 1002 } 1003 1004 cpu_thread_exit(td); /* XXXSMP */ 1005 1006 /* 1007 * The last thread is left attached to the process 1008 * So that the whole bundle gets recycled. Skip 1009 * all this stuff. 1010 */ 1011 if (p->p_numthreads > 1) { 1012 /* 1013 * Unlink this thread from its proc and the kseg. 1014 * In keeping with the other structs we probably should 1015 * have a thread_unlink() that does some of this but it 1016 * would only be called from here (I think) so it would 1017 * be a waste. (might be useful for proc_fini() as well.) 1018 */ 1019 TAILQ_REMOVE(&p->p_threads, td, td_plist); 1020 p->p_numthreads--; 1021 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 1022 kg->kg_numthreads--; 1023 /* 1024 * The test below is NOT true if we are the 1025 * sole exiting thread. P_STOPPED_SNGL is unset 1026 * in exit1() after it is the only survivor. 1027 */ 1028 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1029 if (p->p_numthreads == p->p_suspcount) { 1030 thread_unsuspend_one(p->p_singlethread); 1031 } 1032 } 1033 1034 /* Reassign this thread's KSE. */ 1035 ke->ke_thread = NULL; 1036 td->td_kse = NULL; 1037 ke->ke_state = KES_UNQUEUED; 1038 KASSERT((ke->ke_bound != td), 1039 ("thread_exit: entered with ke_bound set")); 1040 1041 /* 1042 * The reason for all this hoopla is 1043 * an attempt to stop our thread stack from being freed 1044 * until AFTER we have stopped running on it. 1045 * Since we are under schedlock, almost any method where 1046 * it is eventually freed by someone else is probably ok. 1047 * (Especially if they do it under schedlock). We could 1048 * almost free it here if we could be certain that 1049 * the uma code wouldn't pull it apart immediatly, 1050 * but unfortunatly we can not guarantee that. 1051 * 1052 * For threads that are exiting and NOT killing their 1053 * KSEs we can just stash it in the KSE, however 1054 * in the case where the KSE is also being deallocated, 1055 * we need to store it somewhere else. It turns out that 1056 * we will never free the last KSE, so there is always one 1057 * other KSE available. We might as well just choose one 1058 * and stash it there. Being under schedlock should make that 1059 * safe. 1060 * 1061 * In borrower threads, we can stash it in the lender 1062 * Where it won't be needed until this thread is long gone. 1063 * Borrower threads can't kill their KSE anyhow, so even 1064 * the KSE would be a safe place for them. It is not 1065 * necessary to have a KSE (or KSEGRP) at all beyond this 1066 * point, while we are under the protection of schedlock. 1067 * 1068 * Either give the KSE to another thread to use (or make 1069 * it idle), or free it entirely, possibly along with its 1070 * ksegrp if it's the last one. 1071 */ 1072 if (ke->ke_flags & KEF_EXIT) { 1073 kse_unlink(ke); 1074 /* 1075 * Designate another KSE to hold our thread. 1076 * Safe as long as we abide by whatever lock 1077 * we control it with.. The other KSE will not 1078 * be able to run it until we release the schelock, 1079 * but we need to be careful about it deciding to 1080 * write to the stack before then. Luckily 1081 * I believe that while another thread's 1082 * standin thread can be used in this way, the 1083 * spare thread for the KSE cannot be used without 1084 * holding schedlock at least once. 1085 */ 1086 ke = FIRST_KSE_IN_PROC(p); 1087 } else { 1088 kse_reassign(ke); 1089 } 1090 #if 0 1091 if (ke->ke_bound) { 1092 /* 1093 * WE are a borrower.. 1094 * stash our thread with the owner. 1095 */ 1096 if (ke->ke_bound->td_standin) { 1097 thread_stash(ke->ke_bound->td_standin); 1098 } 1099 ke->ke_bound->td_standin = td; 1100 } else { 1101 #endif 1102 if (ke->ke_tdspare != NULL) { 1103 thread_stash(ke->ke_tdspare); 1104 ke->ke_tdspare = NULL; 1105 } 1106 ke->ke_tdspare = td; 1107 #if 0 1108 } 1109 #endif 1110 PROC_UNLOCK(p); 1111 td->td_state = TDS_INACTIVE; 1112 td->td_proc = NULL; 1113 td->td_ksegrp = NULL; 1114 td->td_last_kse = NULL; 1115 } else { 1116 PROC_UNLOCK(p); 1117 } 1118 1119 cpu_throw(); 1120 /* NOTREACHED */ 1121 } 1122 1123 /* 1124 * Link a thread to a process. 1125 * set up anything that needs to be initialized for it to 1126 * be used by the process. 1127 * 1128 * Note that we do not link to the proc's ucred here. 1129 * The thread is linked as if running but no KSE assigned. 1130 */ 1131 void 1132 thread_link(struct thread *td, struct ksegrp *kg) 1133 { 1134 struct proc *p; 1135 1136 p = kg->kg_proc; 1137 td->td_state = TDS_INACTIVE; 1138 td->td_proc = p; 1139 td->td_ksegrp = kg; 1140 td->td_last_kse = NULL; 1141 1142 LIST_INIT(&td->td_contested); 1143 callout_init(&td->td_slpcallout, 1); 1144 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 1145 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 1146 p->p_numthreads++; 1147 kg->kg_numthreads++; 1148 if (oiks_debug && (p->p_numthreads > oiks_max_threads_per_proc)) { 1149 printf("OIKS %d\n", p->p_numthreads); 1150 if (oiks_debug > 1) 1151 Debugger("OIKS"); 1152 } 1153 td->td_kse = NULL; 1154 } 1155 1156 void 1157 kse_purge(struct proc *p, struct thread *td) 1158 { 1159 struct kse *ke; 1160 struct ksegrp *kg; 1161 1162 KASSERT(p->p_numthreads == 1, ("bad thread number")); 1163 mtx_lock_spin(&sched_lock); 1164 while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) { 1165 while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 1166 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 1167 kg->kg_idle_kses--; 1168 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 1169 kg->kg_kses--; 1170 if (ke->ke_tdspare) 1171 thread_stash(ke->ke_tdspare); 1172 kse_stash(ke); 1173 } 1174 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 1175 p->p_numksegrps--; 1176 KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) || 1177 ((kg->kg_kses == 1) && (kg == td->td_ksegrp)), 1178 ("wrong kg_kses")); 1179 if (kg != td->td_ksegrp) { 1180 ksegrp_stash(kg); 1181 } 1182 } 1183 TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp); 1184 p->p_numksegrps++; 1185 mtx_unlock_spin(&sched_lock); 1186 } 1187 1188 1189 /* 1190 * Create a thread and schedule it for upcall on the KSE given. 1191 */ 1192 struct thread * 1193 thread_schedule_upcall(struct thread *td, struct kse *ke) 1194 { 1195 struct thread *td2; 1196 struct ksegrp *kg; 1197 int newkse; 1198 1199 mtx_assert(&sched_lock, MA_OWNED); 1200 newkse = (ke != td->td_kse); 1201 1202 /* 1203 * If the kse is already owned by another thread then we can't 1204 * schedule an upcall because the other thread must be BOUND 1205 * which means it is not in a position to take an upcall. 1206 * We must be borrowing the KSE to allow us to complete some in-kernel 1207 * work. When we complete, the Bound thread will have teh chance to 1208 * complete. This thread will sleep as planned. Hopefully there will 1209 * eventually be un unbound thread that can be converted to an 1210 * upcall to report the completion of this thread. 1211 */ 1212 if (ke->ke_bound && ((ke->ke_bound->td_flags & TDF_UNBOUND) == 0)) { 1213 return (NULL); 1214 } 1215 KASSERT((ke->ke_bound == NULL), ("kse already bound")); 1216 1217 if (ke->ke_state == KES_IDLE) { 1218 kg = ke->ke_ksegrp; 1219 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 1220 kg->kg_idle_kses--; 1221 ke->ke_state = KES_UNQUEUED; 1222 } 1223 if ((td2 = td->td_standin) != NULL) { 1224 td->td_standin = NULL; 1225 } else { 1226 if (newkse) 1227 panic("no reserve thread when called with a new kse"); 1228 /* 1229 * If called from (e.g.) sleep and we do not have 1230 * a reserve thread, then we've used it, so do not 1231 * create an upcall. 1232 */ 1233 return (NULL); 1234 } 1235 CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)", 1236 td2, td->td_proc->p_pid, td->td_proc->p_comm); 1237 bzero(&td2->td_startzero, 1238 (unsigned)RANGEOF(struct thread, td_startzero, td_endzero)); 1239 bcopy(&td->td_startcopy, &td2->td_startcopy, 1240 (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); 1241 thread_link(td2, ke->ke_ksegrp); 1242 cpu_set_upcall(td2, td->td_pcb); 1243 1244 /* 1245 * XXXKSE do we really need this? (default values for the 1246 * frame). 1247 */ 1248 bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe)); 1249 1250 /* 1251 * Bind the new thread to the KSE, 1252 * and if it's our KSE, lend it back to ourself 1253 * so we can continue running. 1254 */ 1255 td2->td_ucred = crhold(td->td_ucred); 1256 td2->td_flags = TDF_UPCALLING; /* note: BOUND */ 1257 td2->td_kse = ke; 1258 td2->td_state = TDS_CAN_RUN; 1259 td2->td_inhibitors = 0; 1260 /* 1261 * If called from msleep(), we are working on the current 1262 * KSE so fake that we borrowed it. If called from 1263 * kse_create(), don't, as we have a new kse too. 1264 */ 1265 if (!newkse) { 1266 /* 1267 * This thread will be scheduled when the current thread 1268 * blocks, exits or tries to enter userspace, (which ever 1269 * happens first). When that happens the KSe will "revert" 1270 * to this thread in a BOUND manner. Since we are called 1271 * from msleep() this is going to be "very soon" in nearly 1272 * all cases. 1273 */ 1274 ke->ke_bound = td2; 1275 TD_SET_LOAN(td2); 1276 } else { 1277 ke->ke_bound = NULL; 1278 ke->ke_thread = td2; 1279 ke->ke_state = KES_THREAD; 1280 setrunqueue(td2); 1281 } 1282 return (td2); /* bogus.. should be a void function */ 1283 } 1284 1285 /* 1286 * Schedule an upcall to notify a KSE process recieved signals. 1287 * 1288 * XXX - Modifying a sigset_t like this is totally bogus. 1289 */ 1290 struct thread * 1291 signal_upcall(struct proc *p, int sig) 1292 { 1293 struct thread *td, *td2; 1294 struct kse *ke; 1295 sigset_t ss; 1296 int error; 1297 1298 PROC_LOCK_ASSERT(p, MA_OWNED); 1299 return (NULL); 1300 1301 td = FIRST_THREAD_IN_PROC(p); 1302 ke = td->td_kse; 1303 PROC_UNLOCK(p); 1304 error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t)); 1305 PROC_LOCK(p); 1306 if (error) 1307 return (NULL); 1308 SIGADDSET(ss, sig); 1309 PROC_UNLOCK(p); 1310 error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t)); 1311 PROC_LOCK(p); 1312 if (error) 1313 return (NULL); 1314 if (td->td_standin == NULL) 1315 td->td_standin = thread_alloc(); 1316 mtx_lock_spin(&sched_lock); 1317 td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */ 1318 mtx_unlock_spin(&sched_lock); 1319 return (td2); 1320 } 1321 1322 /* 1323 * setup done on the thread when it enters the kernel. 1324 * XXXKSE Presently only for syscalls but eventually all kernel entries. 1325 */ 1326 void 1327 thread_user_enter(struct proc *p, struct thread *td) 1328 { 1329 struct kse *ke; 1330 1331 /* 1332 * First check that we shouldn't just abort. 1333 * But check if we are the single thread first! 1334 * XXX p_singlethread not locked, but should be safe. 1335 */ 1336 if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) { 1337 PROC_LOCK(p); 1338 mtx_lock_spin(&sched_lock); 1339 thread_exit(); 1340 /* NOTREACHED */ 1341 } 1342 1343 /* 1344 * If we are doing a syscall in a KSE environment, 1345 * note where our mailbox is. There is always the 1346 * possibility that we could do this lazily (in sleep()), 1347 * but for now do it every time. 1348 */ 1349 ke = td->td_kse; 1350 if (ke->ke_mailbox != NULL) { 1351 #if 0 1352 td->td_mailbox = (void *)fuword((caddr_t)ke->ke_mailbox 1353 + offsetof(struct kse_mailbox, km_curthread)); 1354 #else /* if user pointer arithmetic is ok in the kernel */ 1355 td->td_mailbox = 1356 (void *)fuword( (void *)&ke->ke_mailbox->km_curthread); 1357 #endif 1358 if ((td->td_mailbox == NULL) || 1359 (td->td_mailbox == (void *)-1)) { 1360 td->td_mailbox = NULL; /* single thread it.. */ 1361 mtx_lock_spin(&sched_lock); 1362 td->td_flags &= ~TDF_UNBOUND; 1363 mtx_unlock_spin(&sched_lock); 1364 } else { 1365 /* 1366 * when thread limit reached, act like that the thread 1367 * has already done an upcall. 1368 */ 1369 if (p->p_numthreads > max_threads_per_proc) { 1370 if (td->td_standin != NULL) 1371 thread_stash(td->td_standin); 1372 td->td_standin = NULL; 1373 } else { 1374 if (td->td_standin == NULL) 1375 td->td_standin = thread_alloc(); 1376 } 1377 mtx_lock_spin(&sched_lock); 1378 td->td_flags |= TDF_UNBOUND; 1379 mtx_unlock_spin(&sched_lock); 1380 td->td_usticks = 0; 1381 } 1382 } 1383 } 1384 1385 /* 1386 * The extra work we go through if we are a threaded process when we 1387 * return to userland. 1388 * 1389 * If we are a KSE process and returning to user mode, check for 1390 * extra work to do before we return (e.g. for more syscalls 1391 * to complete first). If we were in a critical section, we should 1392 * just return to let it finish. Same if we were in the UTS (in 1393 * which case the mailbox's context's busy indicator will be set). 1394 * The only traps we suport will have set the mailbox. 1395 * We will clear it here. 1396 */ 1397 int 1398 thread_userret(struct thread *td, struct trapframe *frame) 1399 { 1400 int error; 1401 int unbound; 1402 struct kse *ke; 1403 struct ksegrp *kg; 1404 struct thread *td2; 1405 struct proc *p; 1406 struct timespec ts; 1407 1408 error = 0; 1409 1410 unbound = td->td_flags & TDF_UNBOUND; 1411 1412 kg = td->td_ksegrp; 1413 p = td->td_proc; 1414 1415 /* 1416 * Originally bound threads never upcall but they may 1417 * loan out their KSE at this point. 1418 * Upcalls imply bound.. They also may want to do some Philantropy. 1419 * Unbound threads on the other hand either yield to other work 1420 * or transform into an upcall. 1421 * (having saved their context to user space in both cases) 1422 */ 1423 if (unbound) { 1424 /* 1425 * We are an unbound thread, looking to return to 1426 * user space. 1427 * THere are several possibilities: 1428 * 1) we are using a borrowed KSE. save state and exit. 1429 * kse_reassign() will recycle the kse as needed, 1430 * 2) we are not.. save state, and then convert ourself 1431 * to be an upcall, bound to the KSE. 1432 * if there are others that need the kse, 1433 * give them a chance by doing an mi_switch(). 1434 * Because we are bound, control will eventually return 1435 * to us here. 1436 * *** 1437 * Save the thread's context, and link it 1438 * into the KSEGRP's list of completed threads. 1439 */ 1440 error = thread_export_context(td); 1441 td->td_mailbox = NULL; 1442 td->td_usticks = 0; 1443 if (error) { 1444 /* 1445 * If we are not running on a borrowed KSE, then 1446 * failing to do the KSE operation just defaults 1447 * back to synchonous operation, so just return from 1448 * the syscall. If it IS borrowed, there is nothing 1449 * we can do. We just lose that context. We 1450 * probably should note this somewhere and send 1451 * the process a signal. 1452 */ 1453 PROC_LOCK(td->td_proc); 1454 psignal(td->td_proc, SIGSEGV); 1455 mtx_lock_spin(&sched_lock); 1456 if (td->td_kse->ke_bound == NULL) { 1457 td->td_flags &= ~TDF_UNBOUND; 1458 PROC_UNLOCK(td->td_proc); 1459 mtx_unlock_spin(&sched_lock); 1460 thread_update_uticks(); 1461 return (error); /* go sync */ 1462 } 1463 thread_exit(); 1464 } 1465 1466 /* 1467 * if the KSE is owned and we are borrowing it, 1468 * don't make an upcall, just exit so that the owner 1469 * can get its KSE if it wants it. 1470 * Our context is already safely stored for later 1471 * use by the UTS. 1472 */ 1473 PROC_LOCK(p); 1474 mtx_lock_spin(&sched_lock); 1475 if (td->td_kse->ke_bound) { 1476 thread_exit(); 1477 } 1478 PROC_UNLOCK(p); 1479 1480 /* 1481 * Turn ourself into a bound upcall. 1482 * We will rely on kse_reassign() 1483 * to make us run at a later time. 1484 * We should look just like a sheduled upcall 1485 * from msleep() or cv_wait(). 1486 */ 1487 td->td_flags &= ~TDF_UNBOUND; 1488 td->td_flags |= TDF_UPCALLING; 1489 /* Only get here if we have become an upcall */ 1490 1491 } else { 1492 mtx_lock_spin(&sched_lock); 1493 } 1494 /* 1495 * We ARE going back to userland with this KSE. 1496 * Check for threads that need to borrow it. 1497 * Optimisation: don't call mi_switch if no-one wants the KSE. 1498 * Any other thread that comes ready after this missed the boat. 1499 */ 1500 ke = td->td_kse; 1501 if ((td2 = kg->kg_last_assigned)) 1502 td2 = TAILQ_NEXT(td2, td_runq); 1503 else 1504 td2 = TAILQ_FIRST(&kg->kg_runq); 1505 if (td2) { 1506 /* 1507 * force a switch to more urgent 'in kernel' 1508 * work. Control will return to this thread 1509 * when there is no more work to do. 1510 * kse_reassign() will do tha for us. 1511 */ 1512 TD_SET_LOAN(td); 1513 ke->ke_bound = td; 1514 ke->ke_thread = NULL; 1515 mi_switch(); /* kse_reassign() will (re)find td2 */ 1516 } 1517 mtx_unlock_spin(&sched_lock); 1518 1519 /* 1520 * Optimisation: 1521 * Ensure that we have a spare thread available, 1522 * for when we re-enter the kernel. 1523 */ 1524 if (td->td_standin == NULL) { 1525 if (ke->ke_tdspare) { 1526 td->td_standin = ke->ke_tdspare; 1527 ke->ke_tdspare = NULL; 1528 } else { 1529 td->td_standin = thread_alloc(); 1530 } 1531 } 1532 1533 thread_update_uticks(); 1534 /* 1535 * To get here, we know there is no other need for our 1536 * KSE so we can proceed. If not upcalling, go back to 1537 * userspace. If we are, get the upcall set up. 1538 */ 1539 if ((td->td_flags & TDF_UPCALLING) == 0) 1540 return (0); 1541 1542 /* 1543 * We must be an upcall to get this far. 1544 * There is no more work to do and we are going to ride 1545 * this thead/KSE up to userland as an upcall. 1546 * Do the last parts of the setup needed for the upcall. 1547 */ 1548 CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)", 1549 td, td->td_proc->p_pid, td->td_proc->p_comm); 1550 1551 /* 1552 * Set user context to the UTS. 1553 */ 1554 cpu_set_upcall_kse(td, ke); 1555 1556 /* 1557 * Put any completed mailboxes on this KSE's list. 1558 */ 1559 error = thread_link_mboxes(kg, ke); 1560 if (error) 1561 goto bad; 1562 1563 /* 1564 * Set state and mailbox. 1565 * From now on we are just a bound outgoing process. 1566 * **Problem** userret is often called several times. 1567 * it would be nice if this all happenned only on the first time 1568 * through. (the scan for extra work etc.) 1569 */ 1570 mtx_lock_spin(&sched_lock); 1571 td->td_flags &= ~TDF_UPCALLING; 1572 mtx_unlock_spin(&sched_lock); 1573 #if 0 1574 error = suword((caddr_t)ke->ke_mailbox + 1575 offsetof(struct kse_mailbox, km_curthread), 0); 1576 #else /* if user pointer arithmetic is ok in the kernel */ 1577 error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0); 1578 #endif 1579 ke->ke_uuticks = ke->ke_usticks = 0; 1580 if (!error) { 1581 nanotime(&ts); 1582 if (copyout(&ts, (caddr_t)&ke->ke_mailbox->km_timeofday, 1583 sizeof(ts))) { 1584 goto bad; 1585 } 1586 } 1587 return (0); 1588 1589 bad: 1590 /* 1591 * Things are going to be so screwed we should just kill the process. 1592 * how do we do that? 1593 */ 1594 PROC_LOCK(td->td_proc); 1595 psignal(td->td_proc, SIGSEGV); 1596 PROC_UNLOCK(td->td_proc); 1597 return (error); /* go sync */ 1598 } 1599 1600 /* 1601 * Enforce single-threading. 1602 * 1603 * Returns 1 if the caller must abort (another thread is waiting to 1604 * exit the process or similar). Process is locked! 1605 * Returns 0 when you are successfully the only thread running. 1606 * A process has successfully single threaded in the suspend mode when 1607 * There are no threads in user mode. Threads in the kernel must be 1608 * allowed to continue until they get to the user boundary. They may even 1609 * copy out their return values and data before suspending. They may however be 1610 * accellerated in reaching the user boundary as we will wake up 1611 * any sleeping threads that are interruptable. (PCATCH). 1612 */ 1613 int 1614 thread_single(int force_exit) 1615 { 1616 struct thread *td; 1617 struct thread *td2; 1618 struct proc *p; 1619 1620 td = curthread; 1621 p = td->td_proc; 1622 PROC_LOCK_ASSERT(p, MA_OWNED); 1623 KASSERT((td != NULL), ("curthread is NULL")); 1624 1625 if ((p->p_flag & P_KSES) == 0) 1626 return (0); 1627 1628 /* Is someone already single threading? */ 1629 if (p->p_singlethread) 1630 return (1); 1631 1632 if (force_exit == SINGLE_EXIT) 1633 p->p_flag |= P_SINGLE_EXIT; 1634 else 1635 p->p_flag &= ~P_SINGLE_EXIT; 1636 p->p_flag |= P_STOPPED_SINGLE; 1637 p->p_singlethread = td; 1638 /* XXXKSE Which lock protects the below values? */ 1639 while ((p->p_numthreads - p->p_suspcount) != 1) { 1640 mtx_lock_spin(&sched_lock); 1641 FOREACH_THREAD_IN_PROC(p, td2) { 1642 if (td2 == td) 1643 continue; 1644 if (TD_IS_INHIBITED(td2)) { 1645 if (force_exit == SINGLE_EXIT) { 1646 if (TD_IS_SUSPENDED(td2)) { 1647 thread_unsuspend_one(td2); 1648 } 1649 if (TD_ON_SLEEPQ(td2) && 1650 (td2->td_flags & TDF_SINTR)) { 1651 if (td2->td_flags & TDF_CVWAITQ) 1652 cv_abort(td2); 1653 else 1654 abortsleep(td2); 1655 } 1656 } else { 1657 if (TD_IS_SUSPENDED(td2)) 1658 continue; 1659 /* maybe other inhibitted states too? */ 1660 if (TD_IS_SLEEPING(td2)) 1661 thread_suspend_one(td2); 1662 } 1663 } 1664 } 1665 /* 1666 * Maybe we suspended some threads.. was it enough? 1667 */ 1668 if ((p->p_numthreads - p->p_suspcount) == 1) { 1669 mtx_unlock_spin(&sched_lock); 1670 break; 1671 } 1672 1673 /* 1674 * Wake us up when everyone else has suspended. 1675 * In the mean time we suspend as well. 1676 */ 1677 thread_suspend_one(td); 1678 mtx_unlock(&Giant); 1679 PROC_UNLOCK(p); 1680 mi_switch(); 1681 mtx_unlock_spin(&sched_lock); 1682 mtx_lock(&Giant); 1683 PROC_LOCK(p); 1684 } 1685 if (force_exit == SINGLE_EXIT) 1686 kse_purge(p, td); 1687 return (0); 1688 } 1689 1690 /* 1691 * Called in from locations that can safely check to see 1692 * whether we have to suspend or at least throttle for a 1693 * single-thread event (e.g. fork). 1694 * 1695 * Such locations include userret(). 1696 * If the "return_instead" argument is non zero, the thread must be able to 1697 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 1698 * 1699 * The 'return_instead' argument tells the function if it may do a 1700 * thread_exit() or suspend, or whether the caller must abort and back 1701 * out instead. 1702 * 1703 * If the thread that set the single_threading request has set the 1704 * P_SINGLE_EXIT bit in the process flags then this call will never return 1705 * if 'return_instead' is false, but will exit. 1706 * 1707 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 1708 *---------------+--------------------+--------------------- 1709 * 0 | returns 0 | returns 0 or 1 1710 * | when ST ends | immediatly 1711 *---------------+--------------------+--------------------- 1712 * 1 | thread exits | returns 1 1713 * | | immediatly 1714 * 0 = thread_exit() or suspension ok, 1715 * other = return error instead of stopping the thread. 1716 * 1717 * While a full suspension is under effect, even a single threading 1718 * thread would be suspended if it made this call (but it shouldn't). 1719 * This call should only be made from places where 1720 * thread_exit() would be safe as that may be the outcome unless 1721 * return_instead is set. 1722 */ 1723 int 1724 thread_suspend_check(int return_instead) 1725 { 1726 struct thread *td; 1727 struct proc *p; 1728 struct kse *ke; 1729 struct ksegrp *kg; 1730 1731 td = curthread; 1732 p = td->td_proc; 1733 kg = td->td_ksegrp; 1734 PROC_LOCK_ASSERT(p, MA_OWNED); 1735 while (P_SHOULDSTOP(p)) { 1736 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1737 KASSERT(p->p_singlethread != NULL, 1738 ("singlethread not set")); 1739 /* 1740 * The only suspension in action is a 1741 * single-threading. Single threader need not stop. 1742 * XXX Should be safe to access unlocked 1743 * as it can only be set to be true by us. 1744 */ 1745 if (p->p_singlethread == td) 1746 return (0); /* Exempt from stopping. */ 1747 } 1748 if (return_instead) 1749 return (1); 1750 1751 /* 1752 * If the process is waiting for us to exit, 1753 * this thread should just suicide. 1754 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 1755 */ 1756 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1757 mtx_lock_spin(&sched_lock); 1758 while (mtx_owned(&Giant)) 1759 mtx_unlock(&Giant); 1760 /* 1761 * free extra kses and ksegrps, we needn't worry 1762 * about if current thread is in same ksegrp as 1763 * p_singlethread and last kse in the group 1764 * could be killed, this is protected by kg_numthreads, 1765 * in this case, we deduce that kg_numthreads must > 1. 1766 */ 1767 ke = td->td_kse; 1768 if (ke->ke_bound == NULL && 1769 ((kg->kg_kses != 1) || (kg->kg_numthreads == 1))) 1770 ke->ke_flags |= KEF_EXIT; 1771 thread_exit(); 1772 } 1773 1774 /* 1775 * When a thread suspends, it just 1776 * moves to the processes's suspend queue 1777 * and stays there. 1778 * 1779 * XXXKSE if TDF_BOUND is true 1780 * it will not release it's KSE which might 1781 * lead to deadlock if there are not enough KSEs 1782 * to complete all waiting threads. 1783 * Maybe be able to 'lend' it out again. 1784 * (lent kse's can not go back to userland?) 1785 * and can only be lent in STOPPED state. 1786 */ 1787 mtx_lock_spin(&sched_lock); 1788 if ((p->p_flag & P_STOPPED_SIG) && 1789 (p->p_suspcount+1 == p->p_numthreads)) { 1790 mtx_unlock_spin(&sched_lock); 1791 PROC_LOCK(p->p_pptr); 1792 if ((p->p_pptr->p_procsig->ps_flag & 1793 PS_NOCLDSTOP) == 0) { 1794 psignal(p->p_pptr, SIGCHLD); 1795 } 1796 PROC_UNLOCK(p->p_pptr); 1797 mtx_lock_spin(&sched_lock); 1798 } 1799 mtx_assert(&Giant, MA_NOTOWNED); 1800 thread_suspend_one(td); 1801 PROC_UNLOCK(p); 1802 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1803 if (p->p_numthreads == p->p_suspcount) { 1804 thread_unsuspend_one(p->p_singlethread); 1805 } 1806 } 1807 p->p_stats->p_ru.ru_nivcsw++; 1808 mi_switch(); 1809 mtx_unlock_spin(&sched_lock); 1810 PROC_LOCK(p); 1811 } 1812 return (0); 1813 } 1814 1815 void 1816 thread_suspend_one(struct thread *td) 1817 { 1818 struct proc *p = td->td_proc; 1819 1820 mtx_assert(&sched_lock, MA_OWNED); 1821 p->p_suspcount++; 1822 TD_SET_SUSPENDED(td); 1823 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 1824 /* 1825 * Hack: If we are suspending but are on the sleep queue 1826 * then we are in msleep or the cv equivalent. We 1827 * want to look like we have two Inhibitors. 1828 * May already be set.. doesn't matter. 1829 */ 1830 if (TD_ON_SLEEPQ(td)) 1831 TD_SET_SLEEPING(td); 1832 } 1833 1834 void 1835 thread_unsuspend_one(struct thread *td) 1836 { 1837 struct proc *p = td->td_proc; 1838 1839 mtx_assert(&sched_lock, MA_OWNED); 1840 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1841 TD_CLR_SUSPENDED(td); 1842 p->p_suspcount--; 1843 setrunnable(td); 1844 } 1845 1846 /* 1847 * Allow all threads blocked by single threading to continue running. 1848 */ 1849 void 1850 thread_unsuspend(struct proc *p) 1851 { 1852 struct thread *td; 1853 1854 mtx_assert(&sched_lock, MA_OWNED); 1855 PROC_LOCK_ASSERT(p, MA_OWNED); 1856 if (!P_SHOULDSTOP(p)) { 1857 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1858 thread_unsuspend_one(td); 1859 } 1860 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1861 (p->p_numthreads == p->p_suspcount)) { 1862 /* 1863 * Stopping everything also did the job for the single 1864 * threading request. Now we've downgraded to single-threaded, 1865 * let it continue. 1866 */ 1867 thread_unsuspend_one(p->p_singlethread); 1868 } 1869 } 1870 1871 void 1872 thread_single_end(void) 1873 { 1874 struct thread *td; 1875 struct proc *p; 1876 1877 td = curthread; 1878 p = td->td_proc; 1879 PROC_LOCK_ASSERT(p, MA_OWNED); 1880 p->p_flag &= ~P_STOPPED_SINGLE; 1881 p->p_singlethread = NULL; 1882 /* 1883 * If there are other threads they mey now run, 1884 * unless of course there is a blanket 'stop order' 1885 * on the process. The single threader must be allowed 1886 * to continue however as this is a bad place to stop. 1887 */ 1888 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 1889 mtx_lock_spin(&sched_lock); 1890 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1891 thread_unsuspend_one(td); 1892 } 1893 mtx_unlock_spin(&sched_lock); 1894 } 1895 } 1896 1897 1898