1 /* 2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice(s), this list of conditions and the following disclaimer as 10 * the first lines of this file unmodified other than the possible 11 * addition of one or more copyright notices. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice(s), this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/smp.h> 39 #include <sys/sysctl.h> 40 #include <sys/sysproto.h> 41 #include <sys/filedesc.h> 42 #include <sys/sched.h> 43 #include <sys/signalvar.h> 44 #include <sys/sx.h> 45 #include <sys/tty.h> 46 #include <sys/user.h> 47 #include <sys/jail.h> 48 #include <sys/kse.h> 49 #include <sys/ktr.h> 50 #include <sys/ucontext.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_object.h> 54 #include <vm/pmap.h> 55 #include <vm/uma.h> 56 #include <vm/vm_map.h> 57 58 #include <machine/frame.h> 59 60 /* 61 * KSEGRP related storage. 62 */ 63 static uma_zone_t ksegrp_zone; 64 static uma_zone_t kse_zone; 65 static uma_zone_t thread_zone; 66 67 /* DEBUG ONLY */ 68 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 69 static int oiks_debug = 0; /* 0 disable, 1 printf, 2 enter debugger */ 70 SYSCTL_INT(_kern_threads, OID_AUTO, oiks, CTLFLAG_RW, 71 &oiks_debug, 0, "OIKS thread debug"); 72 73 static int oiks_max_threads_per_proc = 10; 74 SYSCTL_INT(_kern_threads, OID_AUTO, oiks_max_per_proc, CTLFLAG_RW, 75 &oiks_max_threads_per_proc, 0, "Debug limit on threads per proc"); 76 77 static int max_threads_per_proc = 30; 78 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 79 &max_threads_per_proc, 0, "Limit on threads per proc"); 80 81 static int max_groups_per_proc = 5; 82 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 83 &max_groups_per_proc, 0, "Limit on thread groups per proc"); 84 85 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 86 87 struct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 88 TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); 89 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 90 struct mtx zombie_thread_lock; 91 MTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock, 92 "zombie_thread_lock", MTX_SPIN); 93 94 95 96 void kse_purge(struct proc *p, struct thread *td); 97 /* 98 * Pepare a thread for use. 99 */ 100 static void 101 thread_ctor(void *mem, int size, void *arg) 102 { 103 struct thread *td; 104 105 td = (struct thread *)mem; 106 td->td_state = TDS_INACTIVE; 107 td->td_flags |= TDF_UNBOUND; 108 } 109 110 /* 111 * Reclaim a thread after use. 112 */ 113 static void 114 thread_dtor(void *mem, int size, void *arg) 115 { 116 struct thread *td; 117 118 td = (struct thread *)mem; 119 120 #ifdef INVARIANTS 121 /* Verify that this thread is in a safe state to free. */ 122 switch (td->td_state) { 123 case TDS_INHIBITED: 124 case TDS_RUNNING: 125 case TDS_CAN_RUN: 126 case TDS_RUNQ: 127 /* 128 * We must never unlink a thread that is in one of 129 * these states, because it is currently active. 130 */ 131 panic("bad state for thread unlinking"); 132 /* NOTREACHED */ 133 case TDS_INACTIVE: 134 break; 135 default: 136 panic("bad thread state"); 137 /* NOTREACHED */ 138 } 139 #endif 140 } 141 142 /* 143 * Initialize type-stable parts of a thread (when newly created). 144 */ 145 static void 146 thread_init(void *mem, int size) 147 { 148 struct thread *td; 149 150 td = (struct thread *)mem; 151 mtx_lock(&Giant); 152 pmap_new_thread(td, 0); 153 mtx_unlock(&Giant); 154 cpu_thread_setup(td); 155 td->td_sched = (struct td_sched *)&td[1]; 156 } 157 158 /* 159 * Tear down type-stable parts of a thread (just before being discarded). 160 */ 161 static void 162 thread_fini(void *mem, int size) 163 { 164 struct thread *td; 165 166 td = (struct thread *)mem; 167 pmap_dispose_thread(td); 168 } 169 /* 170 * Initialize type-stable parts of a kse (when newly created). 171 */ 172 static void 173 kse_init(void *mem, int size) 174 { 175 struct kse *ke; 176 177 ke = (struct kse *)mem; 178 ke->ke_sched = (struct ke_sched *)&ke[1]; 179 } 180 /* 181 * Initialize type-stable parts of a ksegrp (when newly created). 182 */ 183 static void 184 ksegrp_init(void *mem, int size) 185 { 186 struct ksegrp *kg; 187 188 kg = (struct ksegrp *)mem; 189 kg->kg_sched = (struct kg_sched *)&kg[1]; 190 } 191 192 /* 193 * KSE is linked onto the idle queue. 194 */ 195 void 196 kse_link(struct kse *ke, struct ksegrp *kg) 197 { 198 struct proc *p = kg->kg_proc; 199 200 TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); 201 kg->kg_kses++; 202 ke->ke_state = KES_UNQUEUED; 203 ke->ke_proc = p; 204 ke->ke_ksegrp = kg; 205 ke->ke_thread = NULL; 206 ke->ke_oncpu = NOCPU; 207 } 208 209 void 210 kse_unlink(struct kse *ke) 211 { 212 struct ksegrp *kg; 213 214 mtx_assert(&sched_lock, MA_OWNED); 215 kg = ke->ke_ksegrp; 216 if (ke->ke_state == KES_IDLE) { 217 kg->kg_idle_kses--; 218 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 219 } 220 221 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 222 if (--kg->kg_kses == 0) { 223 ksegrp_unlink(kg); 224 } 225 /* 226 * Aggregate stats from the KSE 227 */ 228 kse_stash(ke); 229 } 230 231 void 232 ksegrp_link(struct ksegrp *kg, struct proc *p) 233 { 234 235 TAILQ_INIT(&kg->kg_threads); 236 TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 237 TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */ 238 TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ 239 TAILQ_INIT(&kg->kg_iq); /* idle kses in ksegrp */ 240 TAILQ_INIT(&kg->kg_lq); /* loan kses in ksegrp */ 241 kg->kg_proc = p; 242 /* the following counters are in the -zero- section and may not need clearing */ 243 kg->kg_numthreads = 0; 244 kg->kg_runnable = 0; 245 kg->kg_kses = 0; 246 kg->kg_idle_kses = 0; 247 kg->kg_loan_kses = 0; 248 kg->kg_runq_kses = 0; /* XXXKSE change name */ 249 /* link it in now that it's consistent */ 250 p->p_numksegrps++; 251 TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 252 } 253 254 void 255 ksegrp_unlink(struct ksegrp *kg) 256 { 257 struct proc *p; 258 259 mtx_assert(&sched_lock, MA_OWNED); 260 p = kg->kg_proc; 261 KASSERT(((kg->kg_numthreads == 0) && (kg->kg_kses == 0)), 262 ("kseg_unlink: residual threads or KSEs")); 263 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 264 p->p_numksegrps--; 265 /* 266 * Aggregate stats from the KSE 267 */ 268 ksegrp_stash(kg); 269 } 270 271 /* 272 * for a newly created process, 273 * link up a the structure and its initial threads etc. 274 */ 275 void 276 proc_linkup(struct proc *p, struct ksegrp *kg, 277 struct kse *ke, struct thread *td) 278 { 279 280 TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 281 TAILQ_INIT(&p->p_threads); /* all threads in proc */ 282 TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 283 p->p_numksegrps = 0; 284 p->p_numthreads = 0; 285 286 ksegrp_link(kg, p); 287 kse_link(ke, kg); 288 thread_link(td, kg); 289 } 290 291 int 292 kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap) 293 { 294 struct proc *p; 295 struct thread *td2; 296 297 p = td->td_proc; 298 /* KSE-enabled processes only, please. */ 299 if (!(p->p_flag & P_KSES)) 300 return (EINVAL); 301 if (uap->tmbx == NULL) 302 return (EINVAL); 303 mtx_lock_spin(&sched_lock); 304 FOREACH_THREAD_IN_PROC(p, td2) { 305 if (td2->td_mailbox == uap->tmbx) { 306 td2->td_flags |= TDF_INTERRUPT; 307 if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) { 308 if (td2->td_flags & TDF_CVWAITQ) 309 cv_abort(td2); 310 else 311 abortsleep(td2); 312 } 313 mtx_unlock_spin(&sched_lock); 314 td->td_retval[0] = 0; 315 td->td_retval[1] = 0; 316 return (0); 317 } 318 } 319 mtx_unlock_spin(&sched_lock); 320 return (ESRCH); 321 } 322 323 int 324 kse_exit(struct thread *td, struct kse_exit_args *uap) 325 { 326 struct proc *p; 327 struct ksegrp *kg; 328 329 p = td->td_proc; 330 /* KSE-enabled processes only, please. */ 331 if (!(p->p_flag & P_KSES)) 332 return (EINVAL); 333 /* must be a bound thread */ 334 if (td->td_flags & TDF_UNBOUND) 335 return (EINVAL); 336 kg = td->td_ksegrp; 337 /* serialize killing kse */ 338 PROC_LOCK(p); 339 mtx_lock_spin(&sched_lock); 340 if ((kg->kg_kses == 1) && (kg->kg_numthreads > 1)) { 341 mtx_unlock_spin(&sched_lock); 342 PROC_UNLOCK(p); 343 return (EDEADLK); 344 } 345 if ((p->p_numthreads == 1) && (p->p_numksegrps == 1)) { 346 p->p_flag &= ~P_KSES; 347 mtx_unlock_spin(&sched_lock); 348 PROC_UNLOCK(p); 349 } else { 350 while (mtx_owned(&Giant)) 351 mtx_unlock(&Giant); 352 td->td_kse->ke_flags |= KEF_EXIT; 353 thread_exit(); 354 /* NOTREACHED */ 355 } 356 return (0); 357 } 358 359 int 360 kse_release(struct thread *td, struct kse_release_args *uap) 361 { 362 struct proc *p; 363 364 p = td->td_proc; 365 /* KSE-enabled processes only */ 366 if (!(p->p_flag & P_KSES)) 367 return (EINVAL); 368 /* 369 * Must be a bound thread. And kse must have a mailbox ready, 370 * if not, the kse would can not generate an upcall. 371 */ 372 if (!(td->td_flags & TDF_UNBOUND) && (td->td_kse->ke_mailbox != NULL)) { 373 PROC_LOCK(p); 374 mtx_lock_spin(&sched_lock); 375 /* prevent last thread from exiting */ 376 if (p->p_numthreads == 1) { 377 mtx_unlock_spin(&sched_lock); 378 if (td->td_standin == NULL) { 379 PROC_UNLOCK(p); 380 td->td_standin = thread_alloc(); 381 PROC_LOCK(p); 382 } 383 msleep(p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH, 384 "pause", 0); 385 mtx_lock_spin(&sched_lock); 386 td->td_flags |= TDF_UNBOUND; 387 thread_schedule_upcall(td, td->td_kse); 388 } 389 thread_exit(); 390 /* NOTREACHED */ 391 } 392 return (EINVAL); 393 } 394 395 /* struct kse_wakeup_args { 396 struct kse_mailbox *mbx; 397 }; */ 398 int 399 kse_wakeup(struct thread *td, struct kse_wakeup_args *uap) 400 { 401 struct proc *p; 402 struct kse *ke, *ke2; 403 struct ksegrp *kg; 404 405 p = td->td_proc; 406 /* KSE-enabled processes only, please. */ 407 if (!(p->p_flag & P_KSES)) 408 return EINVAL; 409 if (td->td_standin == NULL) 410 td->td_standin = thread_alloc(); 411 ke = NULL; 412 mtx_lock_spin(&sched_lock); 413 if (uap->mbx) { 414 FOREACH_KSEGRP_IN_PROC(p, kg) { 415 FOREACH_KSE_IN_GROUP(kg, ke2) { 416 if (ke2->ke_mailbox != uap->mbx) 417 continue; 418 if (ke2->ke_state == KES_IDLE) { 419 ke = ke2; 420 goto found; 421 } else { 422 mtx_unlock_spin(&sched_lock); 423 td->td_retval[0] = 0; 424 td->td_retval[1] = 0; 425 return (0); 426 } 427 } 428 } 429 } else { 430 kg = td->td_ksegrp; 431 ke = TAILQ_FIRST(&kg->kg_iq); 432 } 433 if (ke == NULL) { 434 mtx_unlock_spin(&sched_lock); 435 return (ESRCH); 436 } 437 found: 438 thread_schedule_upcall(td, ke); 439 mtx_unlock_spin(&sched_lock); 440 td->td_retval[0] = 0; 441 td->td_retval[1] = 0; 442 return (0); 443 } 444 445 /* 446 * No new KSEG: first call: use current KSE, don't schedule an upcall 447 * All other situations, do allocate a new KSE and schedule an upcall on it. 448 */ 449 /* struct kse_create_args { 450 struct kse_mailbox *mbx; 451 int newgroup; 452 }; */ 453 int 454 kse_create(struct thread *td, struct kse_create_args *uap) 455 { 456 struct kse *newke; 457 struct kse *ke; 458 struct ksegrp *newkg; 459 struct ksegrp *kg; 460 struct proc *p; 461 struct kse_mailbox mbx; 462 int err; 463 464 p = td->td_proc; 465 if ((err = copyin(uap->mbx, &mbx, sizeof(mbx)))) 466 return (err); 467 468 p->p_flag |= P_KSES; /* easier to just set it than to test and set */ 469 kg = td->td_ksegrp; 470 if (uap->newgroup) { 471 if (p->p_numksegrps >= max_groups_per_proc) 472 return (EPROCLIM); 473 /* 474 * If we want a new KSEGRP it doesn't matter whether 475 * we have already fired up KSE mode before or not. 476 * We put the process in KSE mode and create a new KSEGRP 477 * and KSE. If our KSE has not got a mailbox yet then 478 * that doesn't matter, just leave it that way. It will 479 * ensure that this thread stay BOUND. It's possible 480 * that the call came form a threaded library and the main 481 * program knows nothing of threads. 482 */ 483 newkg = ksegrp_alloc(); 484 bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp, 485 kg_startzero, kg_endzero)); 486 bcopy(&kg->kg_startcopy, &newkg->kg_startcopy, 487 RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy)); 488 newke = kse_alloc(); 489 } else { 490 /* 491 * Otherwise, if we have already set this KSE 492 * to have a mailbox, we want to make another KSE here, 493 * but only if there are not already the limit, which 494 * is 1 per CPU max. 495 * 496 * If the current KSE doesn't have a mailbox we just use it 497 * and give it one. 498 * 499 * Because we don't like to access 500 * the KSE outside of schedlock if we are UNBOUND, 501 * (because it can change if we are preempted by an interrupt) 502 * we can deduce it as having a mailbox if we are UNBOUND, 503 * and only need to actually look at it if we are BOUND, 504 * which is safe. 505 */ 506 if ((td->td_flags & TDF_UNBOUND) || td->td_kse->ke_mailbox) { 507 if (oiks_debug == 0) { 508 #ifdef SMP 509 if (kg->kg_kses > mp_ncpus) 510 #endif 511 return (EPROCLIM); 512 } 513 newke = kse_alloc(); 514 } else { 515 newke = NULL; 516 } 517 newkg = NULL; 518 } 519 if (newke) { 520 bzero(&newke->ke_startzero, RANGEOF(struct kse, 521 ke_startzero, ke_endzero)); 522 #if 0 523 bcopy(&ke->ke_startcopy, &newke->ke_startcopy, 524 RANGEOF(struct kse, ke_startcopy, ke_endcopy)); 525 #endif 526 /* For the first call this may not have been set */ 527 if (td->td_standin == NULL) { 528 td->td_standin = thread_alloc(); 529 } 530 mtx_lock_spin(&sched_lock); 531 if (newkg) { 532 if (p->p_numksegrps >= max_groups_per_proc) { 533 mtx_unlock_spin(&sched_lock); 534 ksegrp_free(newkg); 535 kse_free(newke); 536 return (EPROCLIM); 537 } 538 ksegrp_link(newkg, p); 539 } 540 else 541 newkg = kg; 542 kse_link(newke, newkg); 543 if (p->p_sflag & PS_NEEDSIGCHK) 544 newke->ke_flags |= KEF_ASTPENDING; 545 newke->ke_mailbox = uap->mbx; 546 newke->ke_upcall = mbx.km_func; 547 bcopy(&mbx.km_stack, &newke->ke_stack, sizeof(stack_t)); 548 thread_schedule_upcall(td, newke); 549 mtx_unlock_spin(&sched_lock); 550 } else { 551 /* 552 * If we didn't allocate a new KSE then the we are using 553 * the exisiting (BOUND) kse. 554 */ 555 ke = td->td_kse; 556 ke->ke_mailbox = uap->mbx; 557 ke->ke_upcall = mbx.km_func; 558 bcopy(&mbx.km_stack, &ke->ke_stack, sizeof(stack_t)); 559 } 560 /* 561 * Fill out the KSE-mode specific fields of the new kse. 562 */ 563 564 td->td_retval[0] = 0; 565 td->td_retval[1] = 0; 566 return (0); 567 } 568 569 /* 570 * Fill a ucontext_t with a thread's context information. 571 * 572 * This is an analogue to getcontext(3). 573 */ 574 void 575 thread_getcontext(struct thread *td, ucontext_t *uc) 576 { 577 578 /* 579 * XXX this is declared in a MD include file, i386/include/ucontext.h but 580 * is used in MI code. 581 */ 582 #ifdef __i386__ 583 get_mcontext(td, &uc->uc_mcontext); 584 #endif 585 uc->uc_sigmask = td->td_proc->p_sigmask; 586 } 587 588 /* 589 * Set a thread's context from a ucontext_t. 590 * 591 * This is an analogue to setcontext(3). 592 */ 593 int 594 thread_setcontext(struct thread *td, ucontext_t *uc) 595 { 596 int ret; 597 598 /* 599 * XXX this is declared in a MD include file, i386/include/ucontext.h but 600 * is used in MI code. 601 */ 602 #ifdef __i386__ 603 ret = set_mcontext(td, &uc->uc_mcontext); 604 #else 605 ret = ENOSYS; 606 #endif 607 if (ret == 0) { 608 SIG_CANTMASK(uc->uc_sigmask); 609 PROC_LOCK(td->td_proc); 610 td->td_proc->p_sigmask = uc->uc_sigmask; 611 PROC_UNLOCK(td->td_proc); 612 } 613 return (ret); 614 } 615 616 /* 617 * Initialize global thread allocation resources. 618 */ 619 void 620 threadinit(void) 621 { 622 623 #ifndef __ia64__ 624 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 625 thread_ctor, thread_dtor, thread_init, thread_fini, 626 UMA_ALIGN_CACHE, 0); 627 #else 628 /* 629 * XXX the ia64 kstack allocator is really lame and is at the mercy 630 * of contigmallloc(). This hackery is to pre-construct a whole 631 * pile of thread structures with associated kernel stacks early 632 * in the system startup while contigmalloc() still works. Once we 633 * have them, keep them. Sigh. 634 */ 635 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 636 thread_ctor, thread_dtor, thread_init, thread_fini, 637 UMA_ALIGN_CACHE, UMA_ZONE_NOFREE); 638 uma_prealloc(thread_zone, 512); /* XXX arbitary */ 639 #endif 640 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 641 NULL, NULL, ksegrp_init, NULL, 642 UMA_ALIGN_CACHE, 0); 643 kse_zone = uma_zcreate("KSE", sched_sizeof_kse(), 644 NULL, NULL, kse_init, NULL, 645 UMA_ALIGN_CACHE, 0); 646 } 647 648 /* 649 * Stash an embarasingly extra thread into the zombie thread queue. 650 */ 651 void 652 thread_stash(struct thread *td) 653 { 654 mtx_lock_spin(&zombie_thread_lock); 655 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 656 mtx_unlock_spin(&zombie_thread_lock); 657 } 658 659 /* 660 * Stash an embarasingly extra kse into the zombie kse queue. 661 */ 662 void 663 kse_stash(struct kse *ke) 664 { 665 mtx_lock_spin(&zombie_thread_lock); 666 TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); 667 mtx_unlock_spin(&zombie_thread_lock); 668 } 669 670 /* 671 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 672 */ 673 void 674 ksegrp_stash(struct ksegrp *kg) 675 { 676 mtx_lock_spin(&zombie_thread_lock); 677 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 678 mtx_unlock_spin(&zombie_thread_lock); 679 } 680 681 /* 682 * Reap zombie threads. 683 */ 684 void 685 thread_reap(void) 686 { 687 struct thread *td_first, *td_next; 688 struct kse *ke_first, *ke_next; 689 struct ksegrp *kg_first, * kg_next; 690 691 /* 692 * don't even bother to lock if none at this instant 693 * We really don't care about the next instant.. 694 */ 695 if ((!TAILQ_EMPTY(&zombie_threads)) 696 || (!TAILQ_EMPTY(&zombie_kses)) 697 || (!TAILQ_EMPTY(&zombie_ksegrps))) { 698 mtx_lock_spin(&zombie_thread_lock); 699 td_first = TAILQ_FIRST(&zombie_threads); 700 ke_first = TAILQ_FIRST(&zombie_kses); 701 kg_first = TAILQ_FIRST(&zombie_ksegrps); 702 if (td_first) 703 TAILQ_INIT(&zombie_threads); 704 if (ke_first) 705 TAILQ_INIT(&zombie_kses); 706 if (kg_first) 707 TAILQ_INIT(&zombie_ksegrps); 708 mtx_unlock_spin(&zombie_thread_lock); 709 while (td_first) { 710 td_next = TAILQ_NEXT(td_first, td_runq); 711 thread_free(td_first); 712 td_first = td_next; 713 } 714 while (ke_first) { 715 ke_next = TAILQ_NEXT(ke_first, ke_procq); 716 kse_free(ke_first); 717 ke_first = ke_next; 718 } 719 while (kg_first) { 720 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 721 ksegrp_free(kg_first); 722 kg_first = kg_next; 723 } 724 } 725 } 726 727 /* 728 * Allocate a ksegrp. 729 */ 730 struct ksegrp * 731 ksegrp_alloc(void) 732 { 733 return (uma_zalloc(ksegrp_zone, M_WAITOK)); 734 } 735 736 /* 737 * Allocate a kse. 738 */ 739 struct kse * 740 kse_alloc(void) 741 { 742 return (uma_zalloc(kse_zone, M_WAITOK)); 743 } 744 745 /* 746 * Allocate a thread. 747 */ 748 struct thread * 749 thread_alloc(void) 750 { 751 thread_reap(); /* check if any zombies to get */ 752 return (uma_zalloc(thread_zone, M_WAITOK)); 753 } 754 755 /* 756 * Deallocate a ksegrp. 757 */ 758 void 759 ksegrp_free(struct ksegrp *td) 760 { 761 uma_zfree(ksegrp_zone, td); 762 } 763 764 /* 765 * Deallocate a kse. 766 */ 767 void 768 kse_free(struct kse *td) 769 { 770 uma_zfree(kse_zone, td); 771 } 772 773 /* 774 * Deallocate a thread. 775 */ 776 void 777 thread_free(struct thread *td) 778 { 779 uma_zfree(thread_zone, td); 780 } 781 782 /* 783 * Store the thread context in the UTS's mailbox. 784 * then add the mailbox at the head of a list we are building in user space. 785 * The list is anchored in the ksegrp structure. 786 */ 787 int 788 thread_export_context(struct thread *td) 789 { 790 struct proc *p; 791 struct ksegrp *kg; 792 uintptr_t mbx; 793 void *addr; 794 int error; 795 ucontext_t uc; 796 uint temp; 797 798 p = td->td_proc; 799 kg = td->td_ksegrp; 800 801 /* Export the user/machine context. */ 802 #if 0 803 addr = (caddr_t)td->td_mailbox + 804 offsetof(struct kse_thr_mailbox, tm_context); 805 #else /* if user pointer arithmetic is valid in the kernel */ 806 addr = (void *)(&td->td_mailbox->tm_context); 807 #endif 808 error = copyin(addr, &uc, sizeof(ucontext_t)); 809 if (error == 0) { 810 thread_getcontext(td, &uc); 811 error = copyout(&uc, addr, sizeof(ucontext_t)); 812 813 } 814 if (error) { 815 PROC_LOCK(p); 816 psignal(p, SIGSEGV); 817 PROC_UNLOCK(p); 818 return (error); 819 } 820 /* get address in latest mbox of list pointer */ 821 #if 0 822 addr = (caddr_t)td->td_mailbox 823 + offsetof(struct kse_thr_mailbox , tm_next); 824 #else /* if user pointer arithmetic is valid in the kernel */ 825 addr = (void *)(&td->td_mailbox->tm_next); 826 #endif 827 /* 828 * Put the saved address of the previous first 829 * entry into this one 830 */ 831 for (;;) { 832 mbx = (uintptr_t)kg->kg_completed; 833 if (suword(addr, mbx)) { 834 goto bad; 835 } 836 PROC_LOCK(p); 837 if (mbx == (uintptr_t)kg->kg_completed) { 838 kg->kg_completed = td->td_mailbox; 839 PROC_UNLOCK(p); 840 break; 841 } 842 PROC_UNLOCK(p); 843 } 844 addr = (caddr_t)td->td_mailbox 845 + offsetof(struct kse_thr_mailbox, tm_sticks); 846 temp = fuword(addr) + td->td_usticks; 847 if (suword(addr, temp)) 848 goto bad; 849 return (0); 850 851 bad: 852 PROC_LOCK(p); 853 psignal(p, SIGSEGV); 854 PROC_UNLOCK(p); 855 return (EFAULT); 856 } 857 858 /* 859 * Take the list of completed mailboxes for this KSEGRP and put them on this 860 * KSE's mailbox as it's the next one going up. 861 */ 862 static int 863 thread_link_mboxes(struct ksegrp *kg, struct kse *ke) 864 { 865 struct proc *p = kg->kg_proc; 866 void *addr; 867 uintptr_t mbx; 868 869 #if 0 870 addr = (caddr_t)ke->ke_mailbox 871 + offsetof(struct kse_mailbox, km_completed); 872 #else /* if user pointer arithmetic is valid in the kernel */ 873 addr = (void *)(&ke->ke_mailbox->km_completed); 874 #endif 875 for (;;) { 876 mbx = (uintptr_t)kg->kg_completed; 877 if (suword(addr, mbx)) { 878 PROC_LOCK(p); 879 psignal(p, SIGSEGV); 880 PROC_UNLOCK(p); 881 return (EFAULT); 882 } 883 /* XXXKSE could use atomic CMPXCH here */ 884 PROC_LOCK(p); 885 if (mbx == (uintptr_t)kg->kg_completed) { 886 kg->kg_completed = NULL; 887 PROC_UNLOCK(p); 888 break; 889 } 890 PROC_UNLOCK(p); 891 } 892 return (0); 893 } 894 895 /* 896 * This function should be called at statclock interrupt time 897 */ 898 int 899 thread_add_ticks_intr(int user, uint ticks) 900 { 901 struct thread *td = curthread; 902 struct kse *ke = td->td_kse; 903 904 if (ke->ke_mailbox == NULL) 905 return -1; 906 if (user) { 907 /* Current always do via ast() */ 908 ke->ke_flags |= KEF_ASTPENDING; 909 ke->ke_uuticks += ticks; 910 } else { 911 if (td->td_mailbox != NULL) 912 td->td_usticks += ticks; 913 else 914 ke->ke_usticks += ticks; 915 } 916 return 0; 917 } 918 919 static int 920 thread_update_uticks(void) 921 { 922 struct thread *td = curthread; 923 struct proc *p = td->td_proc; 924 struct kse *ke = td->td_kse; 925 struct kse_thr_mailbox *tmbx; 926 caddr_t addr; 927 uint uticks, sticks; 928 929 KASSERT(!(td->td_flags & TDF_UNBOUND), ("thread not bound.")); 930 931 if (ke->ke_mailbox == NULL) 932 return 0; 933 934 uticks = ke->ke_uuticks; 935 ke->ke_uuticks = 0; 936 sticks = ke->ke_usticks; 937 ke->ke_usticks = 0; 938 tmbx = (void *)fuword((caddr_t)ke->ke_mailbox 939 + offsetof(struct kse_mailbox, km_curthread)); 940 if ((tmbx == NULL) || (tmbx == (void *)-1)) 941 return 0; 942 if (uticks) { 943 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_uticks); 944 uticks += fuword(addr); 945 if (suword(addr, uticks)) 946 goto bad; 947 } 948 if (sticks) { 949 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_sticks); 950 sticks += fuword(addr); 951 if (suword(addr, sticks)) 952 goto bad; 953 } 954 return 0; 955 bad: 956 PROC_LOCK(p); 957 psignal(p, SIGSEGV); 958 PROC_UNLOCK(p); 959 return -1; 960 } 961 962 /* 963 * Discard the current thread and exit from its context. 964 * 965 * Because we can't free a thread while we're operating under its context, 966 * push the current thread into our KSE's ke_tdspare slot, freeing the 967 * thread that might be there currently. Because we know that only this 968 * processor will run our KSE, we needn't worry about someone else grabbing 969 * our context before we do a cpu_throw. 970 */ 971 void 972 thread_exit(void) 973 { 974 struct thread *td; 975 struct kse *ke; 976 struct proc *p; 977 struct ksegrp *kg; 978 979 td = curthread; 980 kg = td->td_ksegrp; 981 p = td->td_proc; 982 ke = td->td_kse; 983 984 mtx_assert(&sched_lock, MA_OWNED); 985 KASSERT(p != NULL, ("thread exiting without a process")); 986 KASSERT(ke != NULL, ("thread exiting without a kse")); 987 KASSERT(kg != NULL, ("thread exiting without a kse group")); 988 PROC_LOCK_ASSERT(p, MA_OWNED); 989 CTR1(KTR_PROC, "thread_exit: thread %p", td); 990 KASSERT(!mtx_owned(&Giant), ("dying thread owns giant")); 991 992 if (ke->ke_tdspare != NULL) { 993 thread_stash(ke->ke_tdspare); 994 ke->ke_tdspare = NULL; 995 } 996 if (td->td_standin != NULL) { 997 thread_stash(td->td_standin); 998 td->td_standin = NULL; 999 } 1000 1001 cpu_thread_exit(td); /* XXXSMP */ 1002 1003 /* 1004 * The last thread is left attached to the process 1005 * So that the whole bundle gets recycled. Skip 1006 * all this stuff. 1007 */ 1008 if (p->p_numthreads > 1) { 1009 /* 1010 * Unlink this thread from its proc and the kseg. 1011 * In keeping with the other structs we probably should 1012 * have a thread_unlink() that does some of this but it 1013 * would only be called from here (I think) so it would 1014 * be a waste. (might be useful for proc_fini() as well.) 1015 */ 1016 TAILQ_REMOVE(&p->p_threads, td, td_plist); 1017 p->p_numthreads--; 1018 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 1019 kg->kg_numthreads--; 1020 /* 1021 * The test below is NOT true if we are the 1022 * sole exiting thread. P_STOPPED_SNGL is unset 1023 * in exit1() after it is the only survivor. 1024 */ 1025 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1026 if (p->p_numthreads == p->p_suspcount) { 1027 thread_unsuspend_one(p->p_singlethread); 1028 } 1029 } 1030 1031 /* Reassign this thread's KSE. */ 1032 ke->ke_thread = NULL; 1033 td->td_kse = NULL; 1034 ke->ke_state = KES_UNQUEUED; 1035 KASSERT((ke->ke_bound != td), 1036 ("thread_exit: entered with ke_bound set")); 1037 1038 /* 1039 * The reason for all this hoopla is 1040 * an attempt to stop our thread stack from being freed 1041 * until AFTER we have stopped running on it. 1042 * Since we are under schedlock, almost any method where 1043 * it is eventually freed by someone else is probably ok. 1044 * (Especially if they do it under schedlock). We could 1045 * almost free it here if we could be certain that 1046 * the uma code wouldn't pull it apart immediatly, 1047 * but unfortunatly we can not guarantee that. 1048 * 1049 * For threads that are exiting and NOT killing their 1050 * KSEs we can just stash it in the KSE, however 1051 * in the case where the KSE is also being deallocated, 1052 * we need to store it somewhere else. It turns out that 1053 * we will never free the last KSE, so there is always one 1054 * other KSE available. We might as well just choose one 1055 * and stash it there. Being under schedlock should make that 1056 * safe. 1057 * 1058 * In borrower threads, we can stash it in the lender 1059 * Where it won't be needed until this thread is long gone. 1060 * Borrower threads can't kill their KSE anyhow, so even 1061 * the KSE would be a safe place for them. It is not 1062 * necessary to have a KSE (or KSEGRP) at all beyond this 1063 * point, while we are under the protection of schedlock. 1064 * 1065 * Either give the KSE to another thread to use (or make 1066 * it idle), or free it entirely, possibly along with its 1067 * ksegrp if it's the last one. 1068 */ 1069 if (ke->ke_flags & KEF_EXIT) { 1070 kse_unlink(ke); 1071 /* 1072 * Designate another KSE to hold our thread. 1073 * Safe as long as we abide by whatever lock 1074 * we control it with.. The other KSE will not 1075 * be able to run it until we release the schelock, 1076 * but we need to be careful about it deciding to 1077 * write to the stack before then. Luckily 1078 * I believe that while another thread's 1079 * standin thread can be used in this way, the 1080 * spare thread for the KSE cannot be used without 1081 * holding schedlock at least once. 1082 */ 1083 ke = FIRST_KSE_IN_PROC(p); 1084 } else { 1085 kse_reassign(ke); 1086 } 1087 #if 0 1088 if (ke->ke_bound) { 1089 /* 1090 * WE are a borrower.. 1091 * stash our thread with the owner. 1092 */ 1093 if (ke->ke_bound->td_standin) { 1094 thread_stash(ke->ke_bound->td_standin); 1095 } 1096 ke->ke_bound->td_standin = td; 1097 } else { 1098 #endif 1099 if (ke->ke_tdspare != NULL) { 1100 thread_stash(ke->ke_tdspare); 1101 ke->ke_tdspare = NULL; 1102 } 1103 ke->ke_tdspare = td; 1104 #if 0 1105 } 1106 #endif 1107 PROC_UNLOCK(p); 1108 td->td_state = TDS_INACTIVE; 1109 td->td_proc = NULL; 1110 td->td_ksegrp = NULL; 1111 td->td_last_kse = NULL; 1112 } else { 1113 PROC_UNLOCK(p); 1114 } 1115 1116 cpu_throw(); 1117 /* NOTREACHED */ 1118 } 1119 1120 /* 1121 * Link a thread to a process. 1122 * set up anything that needs to be initialized for it to 1123 * be used by the process. 1124 * 1125 * Note that we do not link to the proc's ucred here. 1126 * The thread is linked as if running but no KSE assigned. 1127 */ 1128 void 1129 thread_link(struct thread *td, struct ksegrp *kg) 1130 { 1131 struct proc *p; 1132 1133 p = kg->kg_proc; 1134 td->td_state = TDS_INACTIVE; 1135 td->td_proc = p; 1136 td->td_ksegrp = kg; 1137 td->td_last_kse = NULL; 1138 1139 LIST_INIT(&td->td_contested); 1140 callout_init(&td->td_slpcallout, 1); 1141 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 1142 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 1143 p->p_numthreads++; 1144 kg->kg_numthreads++; 1145 if (oiks_debug && (p->p_numthreads > oiks_max_threads_per_proc)) { 1146 printf("OIKS %d\n", p->p_numthreads); 1147 if (oiks_debug > 1) 1148 Debugger("OIKS"); 1149 } 1150 td->td_kse = NULL; 1151 } 1152 1153 void 1154 kse_purge(struct proc *p, struct thread *td) 1155 { 1156 struct kse *ke; 1157 struct ksegrp *kg; 1158 1159 KASSERT(p->p_numthreads == 1, ("bad thread number")); 1160 mtx_lock_spin(&sched_lock); 1161 while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) { 1162 while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 1163 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 1164 kg->kg_idle_kses--; 1165 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 1166 kg->kg_kses--; 1167 if (ke->ke_tdspare) 1168 thread_stash(ke->ke_tdspare); 1169 kse_stash(ke); 1170 } 1171 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 1172 p->p_numksegrps--; 1173 KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) || 1174 ((kg->kg_kses == 1) && (kg == td->td_ksegrp)), 1175 ("wrong kg_kses")); 1176 if (kg != td->td_ksegrp) { 1177 ksegrp_stash(kg); 1178 } 1179 } 1180 TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp); 1181 p->p_numksegrps++; 1182 mtx_unlock_spin(&sched_lock); 1183 } 1184 1185 1186 /* 1187 * Create a thread and schedule it for upcall on the KSE given. 1188 */ 1189 struct thread * 1190 thread_schedule_upcall(struct thread *td, struct kse *ke) 1191 { 1192 struct thread *td2; 1193 struct ksegrp *kg; 1194 int newkse; 1195 1196 mtx_assert(&sched_lock, MA_OWNED); 1197 newkse = (ke != td->td_kse); 1198 1199 /* 1200 * If the kse is already owned by another thread then we can't 1201 * schedule an upcall because the other thread must be BOUND 1202 * which means it is not in a position to take an upcall. 1203 * We must be borrowing the KSE to allow us to complete some in-kernel 1204 * work. When we complete, the Bound thread will have teh chance to 1205 * complete. This thread will sleep as planned. Hopefully there will 1206 * eventually be un unbound thread that can be converted to an 1207 * upcall to report the completion of this thread. 1208 */ 1209 if (ke->ke_bound && ((ke->ke_bound->td_flags & TDF_UNBOUND) == 0)) { 1210 return (NULL); 1211 } 1212 KASSERT((ke->ke_bound == NULL), ("kse already bound")); 1213 1214 if (ke->ke_state == KES_IDLE) { 1215 kg = ke->ke_ksegrp; 1216 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 1217 kg->kg_idle_kses--; 1218 ke->ke_state = KES_UNQUEUED; 1219 } 1220 if ((td2 = td->td_standin) != NULL) { 1221 td->td_standin = NULL; 1222 } else { 1223 if (newkse) 1224 panic("no reserve thread when called with a new kse"); 1225 /* 1226 * If called from (e.g.) sleep and we do not have 1227 * a reserve thread, then we've used it, so do not 1228 * create an upcall. 1229 */ 1230 return (NULL); 1231 } 1232 CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)", 1233 td2, td->td_proc->p_pid, td->td_proc->p_comm); 1234 bzero(&td2->td_startzero, 1235 (unsigned)RANGEOF(struct thread, td_startzero, td_endzero)); 1236 bcopy(&td->td_startcopy, &td2->td_startcopy, 1237 (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); 1238 thread_link(td2, ke->ke_ksegrp); 1239 cpu_set_upcall(td2, td->td_pcb); 1240 1241 /* 1242 * XXXKSE do we really need this? (default values for the 1243 * frame). 1244 */ 1245 bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe)); 1246 1247 /* 1248 * Bind the new thread to the KSE, 1249 * and if it's our KSE, lend it back to ourself 1250 * so we can continue running. 1251 */ 1252 td2->td_ucred = crhold(td->td_ucred); 1253 td2->td_flags = TDF_UPCALLING; /* note: BOUND */ 1254 td2->td_kse = ke; 1255 td2->td_state = TDS_CAN_RUN; 1256 td2->td_inhibitors = 0; 1257 /* 1258 * If called from msleep(), we are working on the current 1259 * KSE so fake that we borrowed it. If called from 1260 * kse_create(), don't, as we have a new kse too. 1261 */ 1262 if (!newkse) { 1263 /* 1264 * This thread will be scheduled when the current thread 1265 * blocks, exits or tries to enter userspace, (which ever 1266 * happens first). When that happens the KSe will "revert" 1267 * to this thread in a BOUND manner. Since we are called 1268 * from msleep() this is going to be "very soon" in nearly 1269 * all cases. 1270 */ 1271 ke->ke_bound = td2; 1272 TD_SET_LOAN(td2); 1273 } else { 1274 ke->ke_bound = NULL; 1275 ke->ke_thread = td2; 1276 ke->ke_state = KES_THREAD; 1277 setrunqueue(td2); 1278 } 1279 return (td2); /* bogus.. should be a void function */ 1280 } 1281 1282 /* 1283 * Schedule an upcall to notify a KSE process recieved signals. 1284 * 1285 * XXX - Modifying a sigset_t like this is totally bogus. 1286 */ 1287 struct thread * 1288 signal_upcall(struct proc *p, int sig) 1289 { 1290 struct thread *td, *td2; 1291 struct kse *ke; 1292 sigset_t ss; 1293 int error; 1294 1295 PROC_LOCK_ASSERT(p, MA_OWNED); 1296 return (NULL); 1297 1298 td = FIRST_THREAD_IN_PROC(p); 1299 ke = td->td_kse; 1300 PROC_UNLOCK(p); 1301 error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t)); 1302 PROC_LOCK(p); 1303 if (error) 1304 return (NULL); 1305 SIGADDSET(ss, sig); 1306 PROC_UNLOCK(p); 1307 error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t)); 1308 PROC_LOCK(p); 1309 if (error) 1310 return (NULL); 1311 if (td->td_standin == NULL) 1312 td->td_standin = thread_alloc(); 1313 mtx_lock_spin(&sched_lock); 1314 td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */ 1315 mtx_unlock_spin(&sched_lock); 1316 return (td2); 1317 } 1318 1319 /* 1320 * setup done on the thread when it enters the kernel. 1321 * XXXKSE Presently only for syscalls but eventually all kernel entries. 1322 */ 1323 void 1324 thread_user_enter(struct proc *p, struct thread *td) 1325 { 1326 struct kse *ke; 1327 1328 /* 1329 * First check that we shouldn't just abort. 1330 * But check if we are the single thread first! 1331 * XXX p_singlethread not locked, but should be safe. 1332 */ 1333 if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) { 1334 PROC_LOCK(p); 1335 mtx_lock_spin(&sched_lock); 1336 thread_exit(); 1337 /* NOTREACHED */ 1338 } 1339 1340 /* 1341 * If we are doing a syscall in a KSE environment, 1342 * note where our mailbox is. There is always the 1343 * possibility that we could do this lazily (in sleep()), 1344 * but for now do it every time. 1345 */ 1346 ke = td->td_kse; 1347 if (ke->ke_mailbox != NULL) { 1348 #if 0 1349 td->td_mailbox = (void *)fuword((caddr_t)ke->ke_mailbox 1350 + offsetof(struct kse_mailbox, km_curthread)); 1351 #else /* if user pointer arithmetic is ok in the kernel */ 1352 td->td_mailbox = 1353 (void *)fuword( (void *)&ke->ke_mailbox->km_curthread); 1354 #endif 1355 if ((td->td_mailbox == NULL) || 1356 (td->td_mailbox == (void *)-1)) { 1357 td->td_mailbox = NULL; /* single thread it.. */ 1358 mtx_lock_spin(&sched_lock); 1359 td->td_flags &= ~TDF_UNBOUND; 1360 mtx_unlock_spin(&sched_lock); 1361 } else { 1362 /* 1363 * when thread limit reached, act like that the thread 1364 * has already done an upcall. 1365 */ 1366 if (p->p_numthreads > max_threads_per_proc) { 1367 if (td->td_standin != NULL) 1368 thread_stash(td->td_standin); 1369 td->td_standin = NULL; 1370 } else { 1371 if (td->td_standin == NULL) 1372 td->td_standin = thread_alloc(); 1373 } 1374 mtx_lock_spin(&sched_lock); 1375 td->td_flags |= TDF_UNBOUND; 1376 mtx_unlock_spin(&sched_lock); 1377 td->td_usticks = 0; 1378 } 1379 } 1380 } 1381 1382 /* 1383 * The extra work we go through if we are a threaded process when we 1384 * return to userland. 1385 * 1386 * If we are a KSE process and returning to user mode, check for 1387 * extra work to do before we return (e.g. for more syscalls 1388 * to complete first). If we were in a critical section, we should 1389 * just return to let it finish. Same if we were in the UTS (in 1390 * which case the mailbox's context's busy indicator will be set). 1391 * The only traps we suport will have set the mailbox. 1392 * We will clear it here. 1393 */ 1394 int 1395 thread_userret(struct thread *td, struct trapframe *frame) 1396 { 1397 int error; 1398 int unbound; 1399 struct kse *ke; 1400 struct ksegrp *kg; 1401 struct thread *td2; 1402 struct proc *p; 1403 struct timespec ts; 1404 1405 error = 0; 1406 1407 unbound = td->td_flags & TDF_UNBOUND; 1408 1409 kg = td->td_ksegrp; 1410 p = td->td_proc; 1411 1412 /* 1413 * Originally bound threads never upcall but they may 1414 * loan out their KSE at this point. 1415 * Upcalls imply bound.. They also may want to do some Philantropy. 1416 * Unbound threads on the other hand either yield to other work 1417 * or transform into an upcall. 1418 * (having saved their context to user space in both cases) 1419 */ 1420 if (unbound) { 1421 /* 1422 * We are an unbound thread, looking to return to 1423 * user space. 1424 * THere are several possibilities: 1425 * 1) we are using a borrowed KSE. save state and exit. 1426 * kse_reassign() will recycle the kse as needed, 1427 * 2) we are not.. save state, and then convert ourself 1428 * to be an upcall, bound to the KSE. 1429 * if there are others that need the kse, 1430 * give them a chance by doing an mi_switch(). 1431 * Because we are bound, control will eventually return 1432 * to us here. 1433 * *** 1434 * Save the thread's context, and link it 1435 * into the KSEGRP's list of completed threads. 1436 */ 1437 error = thread_export_context(td); 1438 td->td_mailbox = NULL; 1439 td->td_usticks = 0; 1440 if (error) { 1441 /* 1442 * If we are not running on a borrowed KSE, then 1443 * failing to do the KSE operation just defaults 1444 * back to synchonous operation, so just return from 1445 * the syscall. If it IS borrowed, there is nothing 1446 * we can do. We just lose that context. We 1447 * probably should note this somewhere and send 1448 * the process a signal. 1449 */ 1450 PROC_LOCK(td->td_proc); 1451 psignal(td->td_proc, SIGSEGV); 1452 mtx_lock_spin(&sched_lock); 1453 if (td->td_kse->ke_bound == NULL) { 1454 td->td_flags &= ~TDF_UNBOUND; 1455 PROC_UNLOCK(td->td_proc); 1456 mtx_unlock_spin(&sched_lock); 1457 thread_update_uticks(); 1458 return (error); /* go sync */ 1459 } 1460 thread_exit(); 1461 } 1462 1463 /* 1464 * if the KSE is owned and we are borrowing it, 1465 * don't make an upcall, just exit so that the owner 1466 * can get its KSE if it wants it. 1467 * Our context is already safely stored for later 1468 * use by the UTS. 1469 */ 1470 PROC_LOCK(p); 1471 mtx_lock_spin(&sched_lock); 1472 if (td->td_kse->ke_bound) { 1473 thread_exit(); 1474 } 1475 PROC_UNLOCK(p); 1476 1477 /* 1478 * Turn ourself into a bound upcall. 1479 * We will rely on kse_reassign() 1480 * to make us run at a later time. 1481 * We should look just like a sheduled upcall 1482 * from msleep() or cv_wait(). 1483 */ 1484 td->td_flags &= ~TDF_UNBOUND; 1485 td->td_flags |= TDF_UPCALLING; 1486 /* Only get here if we have become an upcall */ 1487 1488 } else { 1489 mtx_lock_spin(&sched_lock); 1490 } 1491 /* 1492 * We ARE going back to userland with this KSE. 1493 * Check for threads that need to borrow it. 1494 * Optimisation: don't call mi_switch if no-one wants the KSE. 1495 * Any other thread that comes ready after this missed the boat. 1496 */ 1497 ke = td->td_kse; 1498 if ((td2 = kg->kg_last_assigned)) 1499 td2 = TAILQ_NEXT(td2, td_runq); 1500 else 1501 td2 = TAILQ_FIRST(&kg->kg_runq); 1502 if (td2) { 1503 /* 1504 * force a switch to more urgent 'in kernel' 1505 * work. Control will return to this thread 1506 * when there is no more work to do. 1507 * kse_reassign() will do tha for us. 1508 */ 1509 TD_SET_LOAN(td); 1510 ke->ke_bound = td; 1511 ke->ke_thread = NULL; 1512 mi_switch(); /* kse_reassign() will (re)find td2 */ 1513 } 1514 mtx_unlock_spin(&sched_lock); 1515 1516 /* 1517 * Optimisation: 1518 * Ensure that we have a spare thread available, 1519 * for when we re-enter the kernel. 1520 */ 1521 if (td->td_standin == NULL) { 1522 if (ke->ke_tdspare) { 1523 td->td_standin = ke->ke_tdspare; 1524 ke->ke_tdspare = NULL; 1525 } else { 1526 td->td_standin = thread_alloc(); 1527 } 1528 } 1529 1530 thread_update_uticks(); 1531 /* 1532 * To get here, we know there is no other need for our 1533 * KSE so we can proceed. If not upcalling, go back to 1534 * userspace. If we are, get the upcall set up. 1535 */ 1536 if ((td->td_flags & TDF_UPCALLING) == 0) 1537 return (0); 1538 1539 /* 1540 * We must be an upcall to get this far. 1541 * There is no more work to do and we are going to ride 1542 * this thead/KSE up to userland as an upcall. 1543 * Do the last parts of the setup needed for the upcall. 1544 */ 1545 CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)", 1546 td, td->td_proc->p_pid, td->td_proc->p_comm); 1547 1548 /* 1549 * Set user context to the UTS. 1550 */ 1551 cpu_set_upcall_kse(td, ke); 1552 1553 /* 1554 * Put any completed mailboxes on this KSE's list. 1555 */ 1556 error = thread_link_mboxes(kg, ke); 1557 if (error) 1558 goto bad; 1559 1560 /* 1561 * Set state and mailbox. 1562 * From now on we are just a bound outgoing process. 1563 * **Problem** userret is often called several times. 1564 * it would be nice if this all happenned only on the first time 1565 * through. (the scan for extra work etc.) 1566 */ 1567 mtx_lock_spin(&sched_lock); 1568 td->td_flags &= ~TDF_UPCALLING; 1569 mtx_unlock_spin(&sched_lock); 1570 #if 0 1571 error = suword((caddr_t)ke->ke_mailbox + 1572 offsetof(struct kse_mailbox, km_curthread), 0); 1573 #else /* if user pointer arithmetic is ok in the kernel */ 1574 error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0); 1575 #endif 1576 ke->ke_uuticks = ke->ke_usticks = 0; 1577 if (!error) { 1578 nanotime(&ts); 1579 if (copyout(&ts, (caddr_t)&ke->ke_mailbox->km_timeofday, 1580 sizeof(ts))) { 1581 goto bad; 1582 } 1583 } 1584 return (0); 1585 1586 bad: 1587 /* 1588 * Things are going to be so screwed we should just kill the process. 1589 * how do we do that? 1590 */ 1591 PROC_LOCK(td->td_proc); 1592 psignal(td->td_proc, SIGSEGV); 1593 PROC_UNLOCK(td->td_proc); 1594 return (error); /* go sync */ 1595 } 1596 1597 /* 1598 * Enforce single-threading. 1599 * 1600 * Returns 1 if the caller must abort (another thread is waiting to 1601 * exit the process or similar). Process is locked! 1602 * Returns 0 when you are successfully the only thread running. 1603 * A process has successfully single threaded in the suspend mode when 1604 * There are no threads in user mode. Threads in the kernel must be 1605 * allowed to continue until they get to the user boundary. They may even 1606 * copy out their return values and data before suspending. They may however be 1607 * accellerated in reaching the user boundary as we will wake up 1608 * any sleeping threads that are interruptable. (PCATCH). 1609 */ 1610 int 1611 thread_single(int force_exit) 1612 { 1613 struct thread *td; 1614 struct thread *td2; 1615 struct proc *p; 1616 1617 td = curthread; 1618 p = td->td_proc; 1619 PROC_LOCK_ASSERT(p, MA_OWNED); 1620 KASSERT((td != NULL), ("curthread is NULL")); 1621 1622 if ((p->p_flag & P_KSES) == 0) 1623 return (0); 1624 1625 /* Is someone already single threading? */ 1626 if (p->p_singlethread) 1627 return (1); 1628 1629 if (force_exit == SINGLE_EXIT) 1630 p->p_flag |= P_SINGLE_EXIT; 1631 else 1632 p->p_flag &= ~P_SINGLE_EXIT; 1633 p->p_flag |= P_STOPPED_SINGLE; 1634 p->p_singlethread = td; 1635 /* XXXKSE Which lock protects the below values? */ 1636 while ((p->p_numthreads - p->p_suspcount) != 1) { 1637 mtx_lock_spin(&sched_lock); 1638 FOREACH_THREAD_IN_PROC(p, td2) { 1639 if (td2 == td) 1640 continue; 1641 if (TD_IS_INHIBITED(td2)) { 1642 if (force_exit == SINGLE_EXIT) { 1643 if (TD_IS_SUSPENDED(td2)) { 1644 thread_unsuspend_one(td2); 1645 } 1646 if (TD_ON_SLEEPQ(td2) && 1647 (td2->td_flags & TDF_SINTR)) { 1648 if (td2->td_flags & TDF_CVWAITQ) 1649 cv_abort(td2); 1650 else 1651 abortsleep(td2); 1652 } 1653 } else { 1654 if (TD_IS_SUSPENDED(td2)) 1655 continue; 1656 /* maybe other inhibitted states too? */ 1657 if (TD_IS_SLEEPING(td2)) 1658 thread_suspend_one(td2); 1659 } 1660 } 1661 } 1662 /* 1663 * Maybe we suspended some threads.. was it enough? 1664 */ 1665 if ((p->p_numthreads - p->p_suspcount) == 1) { 1666 mtx_unlock_spin(&sched_lock); 1667 break; 1668 } 1669 1670 /* 1671 * Wake us up when everyone else has suspended. 1672 * In the mean time we suspend as well. 1673 */ 1674 thread_suspend_one(td); 1675 mtx_unlock(&Giant); 1676 PROC_UNLOCK(p); 1677 mi_switch(); 1678 mtx_unlock_spin(&sched_lock); 1679 mtx_lock(&Giant); 1680 PROC_LOCK(p); 1681 } 1682 if (force_exit == SINGLE_EXIT) 1683 kse_purge(p, td); 1684 return (0); 1685 } 1686 1687 /* 1688 * Called in from locations that can safely check to see 1689 * whether we have to suspend or at least throttle for a 1690 * single-thread event (e.g. fork). 1691 * 1692 * Such locations include userret(). 1693 * If the "return_instead" argument is non zero, the thread must be able to 1694 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 1695 * 1696 * The 'return_instead' argument tells the function if it may do a 1697 * thread_exit() or suspend, or whether the caller must abort and back 1698 * out instead. 1699 * 1700 * If the thread that set the single_threading request has set the 1701 * P_SINGLE_EXIT bit in the process flags then this call will never return 1702 * if 'return_instead' is false, but will exit. 1703 * 1704 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 1705 *---------------+--------------------+--------------------- 1706 * 0 | returns 0 | returns 0 or 1 1707 * | when ST ends | immediatly 1708 *---------------+--------------------+--------------------- 1709 * 1 | thread exits | returns 1 1710 * | | immediatly 1711 * 0 = thread_exit() or suspension ok, 1712 * other = return error instead of stopping the thread. 1713 * 1714 * While a full suspension is under effect, even a single threading 1715 * thread would be suspended if it made this call (but it shouldn't). 1716 * This call should only be made from places where 1717 * thread_exit() would be safe as that may be the outcome unless 1718 * return_instead is set. 1719 */ 1720 int 1721 thread_suspend_check(int return_instead) 1722 { 1723 struct thread *td; 1724 struct proc *p; 1725 struct kse *ke; 1726 struct ksegrp *kg; 1727 1728 td = curthread; 1729 p = td->td_proc; 1730 kg = td->td_ksegrp; 1731 PROC_LOCK_ASSERT(p, MA_OWNED); 1732 while (P_SHOULDSTOP(p)) { 1733 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1734 KASSERT(p->p_singlethread != NULL, 1735 ("singlethread not set")); 1736 /* 1737 * The only suspension in action is a 1738 * single-threading. Single threader need not stop. 1739 * XXX Should be safe to access unlocked 1740 * as it can only be set to be true by us. 1741 */ 1742 if (p->p_singlethread == td) 1743 return (0); /* Exempt from stopping. */ 1744 } 1745 if (return_instead) 1746 return (1); 1747 1748 /* 1749 * If the process is waiting for us to exit, 1750 * this thread should just suicide. 1751 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 1752 */ 1753 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1754 mtx_lock_spin(&sched_lock); 1755 while (mtx_owned(&Giant)) 1756 mtx_unlock(&Giant); 1757 /* 1758 * free extra kses and ksegrps, we needn't worry 1759 * about if current thread is in same ksegrp as 1760 * p_singlethread and last kse in the group 1761 * could be killed, this is protected by kg_numthreads, 1762 * in this case, we deduce that kg_numthreads must > 1. 1763 */ 1764 ke = td->td_kse; 1765 if (ke->ke_bound == NULL && 1766 ((kg->kg_kses != 1) || (kg->kg_numthreads == 1))) 1767 ke->ke_flags |= KEF_EXIT; 1768 thread_exit(); 1769 } 1770 1771 /* 1772 * When a thread suspends, it just 1773 * moves to the processes's suspend queue 1774 * and stays there. 1775 * 1776 * XXXKSE if TDF_BOUND is true 1777 * it will not release it's KSE which might 1778 * lead to deadlock if there are not enough KSEs 1779 * to complete all waiting threads. 1780 * Maybe be able to 'lend' it out again. 1781 * (lent kse's can not go back to userland?) 1782 * and can only be lent in STOPPED state. 1783 */ 1784 mtx_lock_spin(&sched_lock); 1785 if ((p->p_flag & P_STOPPED_SIG) && 1786 (p->p_suspcount+1 == p->p_numthreads)) { 1787 mtx_unlock_spin(&sched_lock); 1788 PROC_LOCK(p->p_pptr); 1789 if ((p->p_pptr->p_procsig->ps_flag & 1790 PS_NOCLDSTOP) == 0) { 1791 psignal(p->p_pptr, SIGCHLD); 1792 } 1793 PROC_UNLOCK(p->p_pptr); 1794 mtx_lock_spin(&sched_lock); 1795 } 1796 mtx_assert(&Giant, MA_NOTOWNED); 1797 thread_suspend_one(td); 1798 PROC_UNLOCK(p); 1799 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1800 if (p->p_numthreads == p->p_suspcount) { 1801 thread_unsuspend_one(p->p_singlethread); 1802 } 1803 } 1804 p->p_stats->p_ru.ru_nivcsw++; 1805 mi_switch(); 1806 mtx_unlock_spin(&sched_lock); 1807 PROC_LOCK(p); 1808 } 1809 return (0); 1810 } 1811 1812 void 1813 thread_suspend_one(struct thread *td) 1814 { 1815 struct proc *p = td->td_proc; 1816 1817 mtx_assert(&sched_lock, MA_OWNED); 1818 p->p_suspcount++; 1819 TD_SET_SUSPENDED(td); 1820 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 1821 /* 1822 * Hack: If we are suspending but are on the sleep queue 1823 * then we are in msleep or the cv equivalent. We 1824 * want to look like we have two Inhibitors. 1825 * May already be set.. doesn't matter. 1826 */ 1827 if (TD_ON_SLEEPQ(td)) 1828 TD_SET_SLEEPING(td); 1829 } 1830 1831 void 1832 thread_unsuspend_one(struct thread *td) 1833 { 1834 struct proc *p = td->td_proc; 1835 1836 mtx_assert(&sched_lock, MA_OWNED); 1837 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1838 TD_CLR_SUSPENDED(td); 1839 p->p_suspcount--; 1840 setrunnable(td); 1841 } 1842 1843 /* 1844 * Allow all threads blocked by single threading to continue running. 1845 */ 1846 void 1847 thread_unsuspend(struct proc *p) 1848 { 1849 struct thread *td; 1850 1851 mtx_assert(&sched_lock, MA_OWNED); 1852 PROC_LOCK_ASSERT(p, MA_OWNED); 1853 if (!P_SHOULDSTOP(p)) { 1854 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1855 thread_unsuspend_one(td); 1856 } 1857 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1858 (p->p_numthreads == p->p_suspcount)) { 1859 /* 1860 * Stopping everything also did the job for the single 1861 * threading request. Now we've downgraded to single-threaded, 1862 * let it continue. 1863 */ 1864 thread_unsuspend_one(p->p_singlethread); 1865 } 1866 } 1867 1868 void 1869 thread_single_end(void) 1870 { 1871 struct thread *td; 1872 struct proc *p; 1873 1874 td = curthread; 1875 p = td->td_proc; 1876 PROC_LOCK_ASSERT(p, MA_OWNED); 1877 p->p_flag &= ~P_STOPPED_SINGLE; 1878 p->p_singlethread = NULL; 1879 /* 1880 * If there are other threads they mey now run, 1881 * unless of course there is a blanket 'stop order' 1882 * on the process. The single threader must be allowed 1883 * to continue however as this is a bad place to stop. 1884 */ 1885 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 1886 mtx_lock_spin(&sched_lock); 1887 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1888 thread_unsuspend_one(td); 1889 } 1890 mtx_unlock_spin(&sched_lock); 1891 } 1892 } 1893 1894 1895