1 /* 2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice(s), this list of conditions and the following disclaimer as 10 * the first lines of this file unmodified other than the possible 11 * addition of one or more copyright notices. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice(s), this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/filedesc.h> 40 #include <sys/tty.h> 41 #include <sys/signalvar.h> 42 #include <sys/sx.h> 43 #include <sys/user.h> 44 #include <sys/jail.h> 45 #include <sys/kse.h> 46 #include <sys/ktr.h> 47 #include <sys/ucontext.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_object.h> 51 #include <vm/pmap.h> 52 #include <vm/uma.h> 53 #include <vm/vm_map.h> 54 55 #include <machine/frame.h> 56 57 /* 58 * KSEGRP related storage. 59 */ 60 static uma_zone_t ksegrp_zone; 61 static uma_zone_t kse_zone; 62 static uma_zone_t thread_zone; 63 64 /* DEBUG ONLY */ 65 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 66 static int oiks_debug = 1; /* 0 disable, 1 printf, 2 enter debugger */ 67 SYSCTL_INT(_kern_threads, OID_AUTO, oiks, CTLFLAG_RW, 68 &oiks_debug, 0, "OIKS thread debug"); 69 70 static int max_threads_per_proc = 10; 71 SYSCTL_INT(_kern_threads, OID_AUTO, max_per_proc, CTLFLAG_RW, 72 &max_threads_per_proc, 0, "Limit on threads per proc"); 73 74 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 75 76 struct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 77 struct mtx zombie_thread_lock; 78 MTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock, 79 "zombie_thread_lock", MTX_SPIN); 80 81 /* 82 * Pepare a thread for use. 83 */ 84 static void 85 thread_ctor(void *mem, int size, void *arg) 86 { 87 struct thread *td; 88 89 KASSERT((size == sizeof(struct thread)), 90 ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread))); 91 92 td = (struct thread *)mem; 93 td->td_state = TDS_INACTIVE; 94 td->td_flags |= TDF_UNBOUND; 95 } 96 97 /* 98 * Reclaim a thread after use. 99 */ 100 static void 101 thread_dtor(void *mem, int size, void *arg) 102 { 103 struct thread *td; 104 105 KASSERT((size == sizeof(struct thread)), 106 ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread))); 107 108 td = (struct thread *)mem; 109 110 #ifdef INVARIANTS 111 /* Verify that this thread is in a safe state to free. */ 112 switch (td->td_state) { 113 case TDS_INHIBITED: 114 case TDS_RUNNING: 115 case TDS_CAN_RUN: 116 case TDS_RUNQ: 117 /* 118 * We must never unlink a thread that is in one of 119 * these states, because it is currently active. 120 */ 121 panic("bad state for thread unlinking"); 122 /* NOTREACHED */ 123 case TDS_INACTIVE: 124 break; 125 default: 126 panic("bad thread state"); 127 /* NOTREACHED */ 128 } 129 #endif 130 } 131 132 /* 133 * Initialize type-stable parts of a thread (when newly created). 134 */ 135 static void 136 thread_init(void *mem, int size) 137 { 138 struct thread *td; 139 140 KASSERT((size == sizeof(struct thread)), 141 ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread))); 142 143 td = (struct thread *)mem; 144 mtx_lock(&Giant); 145 pmap_new_thread(td, 0); 146 mtx_unlock(&Giant); 147 cpu_thread_setup(td); 148 } 149 150 /* 151 * Tear down type-stable parts of a thread (just before being discarded). 152 */ 153 static void 154 thread_fini(void *mem, int size) 155 { 156 struct thread *td; 157 158 KASSERT((size == sizeof(struct thread)), 159 ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread))); 160 161 td = (struct thread *)mem; 162 pmap_dispose_thread(td); 163 } 164 165 /* 166 * Fill a ucontext_t with a thread's context information. 167 * 168 * This is an analogue to getcontext(3). 169 */ 170 void 171 thread_getcontext(struct thread *td, ucontext_t *uc) 172 { 173 174 /* 175 * XXX this is declared in a MD include file, i386/include/ucontext.h but 176 * is used in MI code. 177 */ 178 #ifdef __i386__ 179 get_mcontext(td, &uc->uc_mcontext); 180 #endif 181 uc->uc_sigmask = td->td_proc->p_sigmask; 182 } 183 184 /* 185 * Set a thread's context from a ucontext_t. 186 * 187 * This is an analogue to setcontext(3). 188 */ 189 int 190 thread_setcontext(struct thread *td, ucontext_t *uc) 191 { 192 int ret; 193 194 /* 195 * XXX this is declared in a MD include file, i386/include/ucontext.h but 196 * is used in MI code. 197 */ 198 #ifdef __i386__ 199 ret = set_mcontext(td, &uc->uc_mcontext); 200 #else 201 ret = ENOSYS; 202 #endif 203 if (ret == 0) { 204 SIG_CANTMASK(uc->uc_sigmask); 205 PROC_LOCK(td->td_proc); 206 td->td_proc->p_sigmask = uc->uc_sigmask; 207 PROC_UNLOCK(td->td_proc); 208 } 209 return (ret); 210 } 211 212 /* 213 * Initialize global thread allocation resources. 214 */ 215 void 216 threadinit(void) 217 { 218 219 #ifndef __ia64__ 220 thread_zone = uma_zcreate("THREAD", sizeof (struct thread), 221 thread_ctor, thread_dtor, thread_init, thread_fini, 222 UMA_ALIGN_CACHE, 0); 223 #else 224 /* 225 * XXX the ia64 kstack allocator is really lame and is at the mercy 226 * of contigmallloc(). This hackery is to pre-construct a whole 227 * pile of thread structures with associated kernel stacks early 228 * in the system startup while contigmalloc() still works. Once we 229 * have them, keep them. Sigh. 230 */ 231 thread_zone = uma_zcreate("THREAD", sizeof (struct thread), 232 thread_ctor, thread_dtor, thread_init, thread_fini, 233 UMA_ALIGN_CACHE, UMA_ZONE_NOFREE); 234 uma_prealloc(thread_zone, 512); /* XXX arbitary */ 235 #endif 236 ksegrp_zone = uma_zcreate("KSEGRP", sizeof (struct ksegrp), 237 NULL, NULL, NULL, NULL, 238 UMA_ALIGN_CACHE, 0); 239 kse_zone = uma_zcreate("KSE", sizeof (struct kse), 240 NULL, NULL, NULL, NULL, 241 UMA_ALIGN_CACHE, 0); 242 } 243 244 /* 245 * Stash an embarasingly extra thread into the zombie thread queue. 246 */ 247 void 248 thread_stash(struct thread *td) 249 { 250 mtx_lock_spin(&zombie_thread_lock); 251 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 252 mtx_unlock_spin(&zombie_thread_lock); 253 } 254 255 /* 256 * Reap zombie threads. 257 */ 258 void 259 thread_reap(void) 260 { 261 struct thread *td_reaped; 262 263 /* 264 * don't even bother to lock if none at this instant 265 * We really don't care about the next instant.. 266 */ 267 if (!TAILQ_EMPTY(&zombie_threads)) { 268 mtx_lock_spin(&zombie_thread_lock); 269 while (!TAILQ_EMPTY(&zombie_threads)) { 270 td_reaped = TAILQ_FIRST(&zombie_threads); 271 TAILQ_REMOVE(&zombie_threads, td_reaped, td_runq); 272 mtx_unlock_spin(&zombie_thread_lock); 273 thread_free(td_reaped); 274 mtx_lock_spin(&zombie_thread_lock); 275 } 276 mtx_unlock_spin(&zombie_thread_lock); 277 } 278 } 279 280 /* 281 * Allocate a ksegrp. 282 */ 283 struct ksegrp * 284 ksegrp_alloc(void) 285 { 286 return (uma_zalloc(ksegrp_zone, M_WAITOK)); 287 } 288 289 /* 290 * Allocate a kse. 291 */ 292 struct kse * 293 kse_alloc(void) 294 { 295 return (uma_zalloc(kse_zone, M_WAITOK)); 296 } 297 298 /* 299 * Allocate a thread. 300 */ 301 struct thread * 302 thread_alloc(void) 303 { 304 thread_reap(); /* check if any zombies to get */ 305 return (uma_zalloc(thread_zone, M_WAITOK)); 306 } 307 308 /* 309 * Deallocate a ksegrp. 310 */ 311 void 312 ksegrp_free(struct ksegrp *td) 313 { 314 uma_zfree(ksegrp_zone, td); 315 } 316 317 /* 318 * Deallocate a kse. 319 */ 320 void 321 kse_free(struct kse *td) 322 { 323 uma_zfree(kse_zone, td); 324 } 325 326 /* 327 * Deallocate a thread. 328 */ 329 void 330 thread_free(struct thread *td) 331 { 332 uma_zfree(thread_zone, td); 333 } 334 335 /* 336 * Store the thread context in the UTS's mailbox. 337 * then add the mailbox at the head of a list we are building in user space. 338 * The list is anchored in the ksegrp structure. 339 */ 340 int 341 thread_export_context(struct thread *td) 342 { 343 struct proc *p; 344 struct ksegrp *kg; 345 uintptr_t mbx; 346 void *addr; 347 int error; 348 ucontext_t uc; 349 350 p = td->td_proc; 351 kg = td->td_ksegrp; 352 353 /* Export the user/machine context. */ 354 #if 0 355 addr = (caddr_t)td->td_mailbox + 356 offsetof(struct kse_thr_mailbox, tm_context); 357 #else /* if user pointer arithmetic is valid in the kernel */ 358 addr = (void *)(&td->td_mailbox->tm_context); 359 #endif 360 error = copyin(addr, &uc, sizeof(ucontext_t)); 361 if (error == 0) { 362 thread_getcontext(td, &uc); 363 error = copyout(&uc, addr, sizeof(ucontext_t)); 364 365 } 366 if (error) { 367 PROC_LOCK(p); 368 psignal(p, SIGSEGV); 369 PROC_UNLOCK(p); 370 return (error); 371 } 372 /* get address in latest mbox of list pointer */ 373 #if 0 374 addr = (caddr_t)td->td_mailbox 375 + offsetof(struct kse_thr_mailbox , tm_next); 376 #else /* if user pointer arithmetic is valid in the kernel */ 377 addr = (void *)(&td->td_mailbox->tm_next); 378 #endif 379 /* 380 * Put the saved address of the previous first 381 * entry into this one 382 */ 383 for (;;) { 384 mbx = (uintptr_t)kg->kg_completed; 385 if (suword(addr, mbx)) { 386 PROC_LOCK(p); 387 psignal(p, SIGSEGV); 388 PROC_UNLOCK(p); 389 return (EFAULT); 390 } 391 PROC_LOCK(p); 392 if (mbx == (uintptr_t)kg->kg_completed) { 393 kg->kg_completed = td->td_mailbox; 394 PROC_UNLOCK(p); 395 break; 396 } 397 PROC_UNLOCK(p); 398 } 399 return (0); 400 } 401 402 /* 403 * Take the list of completed mailboxes for this KSEGRP and put them on this 404 * KSE's mailbox as it's the next one going up. 405 */ 406 static int 407 thread_link_mboxes(struct ksegrp *kg, struct kse *ke) 408 { 409 struct proc *p = kg->kg_proc; 410 void *addr; 411 uintptr_t mbx; 412 413 #if 0 414 addr = (caddr_t)ke->ke_mailbox 415 + offsetof(struct kse_mailbox, km_completed); 416 #else /* if user pointer arithmetic is valid in the kernel */ 417 addr = (void *)(&ke->ke_mailbox->km_completed); 418 #endif 419 for (;;) { 420 mbx = (uintptr_t)kg->kg_completed; 421 if (suword(addr, mbx)) { 422 PROC_LOCK(p); 423 psignal(p, SIGSEGV); 424 PROC_UNLOCK(p); 425 return (EFAULT); 426 } 427 /* XXXKSE could use atomic CMPXCH here */ 428 PROC_LOCK(p); 429 if (mbx == (uintptr_t)kg->kg_completed) { 430 kg->kg_completed = NULL; 431 PROC_UNLOCK(p); 432 break; 433 } 434 PROC_UNLOCK(p); 435 } 436 return (0); 437 } 438 439 /* 440 * Discard the current thread and exit from its context. 441 * 442 * Because we can't free a thread while we're operating under its context, 443 * push the current thread into our KSE's ke_tdspare slot, freeing the 444 * thread that might be there currently. Because we know that only this 445 * processor will run our KSE, we needn't worry about someone else grabbing 446 * our context before we do a cpu_throw. 447 */ 448 void 449 thread_exit(void) 450 { 451 struct thread *td; 452 struct kse *ke; 453 struct proc *p; 454 struct ksegrp *kg; 455 456 td = curthread; 457 kg = td->td_ksegrp; 458 p = td->td_proc; 459 ke = td->td_kse; 460 461 mtx_assert(&sched_lock, MA_OWNED); 462 KASSERT(p != NULL, ("thread exiting without a process")); 463 KASSERT(ke != NULL, ("thread exiting without a kse")); 464 KASSERT(kg != NULL, ("thread exiting without a kse group")); 465 PROC_LOCK_ASSERT(p, MA_OWNED); 466 CTR1(KTR_PROC, "thread_exit: thread %p", td); 467 KASSERT(!mtx_owned(&Giant), ("dying thread owns giant")); 468 469 if (ke->ke_tdspare != NULL) { 470 thread_stash(ke->ke_tdspare); 471 ke->ke_tdspare = NULL; 472 } 473 if (td->td_standin != NULL) { 474 thread_stash(td->td_standin); 475 td->td_standin = NULL; 476 } 477 478 cpu_thread_exit(td); /* XXXSMP */ 479 480 /* 481 * The last thread is left attached to the process 482 * So that the whole bundle gets recycled. Skip 483 * all this stuff. 484 */ 485 if (p->p_numthreads > 1) { 486 /* Unlink this thread from its proc. and the kseg */ 487 TAILQ_REMOVE(&p->p_threads, td, td_plist); 488 p->p_numthreads--; 489 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 490 kg->kg_numthreads--; 491 /* 492 * The test below is NOT true if we are the 493 * sole exiting thread. P_STOPPED_SNGL is unset 494 * in exit1() after it is the only survivor. 495 */ 496 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 497 if (p->p_numthreads == p->p_suspcount) { 498 thread_unsuspend_one(p->p_singlethread); 499 } 500 } 501 502 /* Reassign this thread's KSE. */ 503 ke->ke_thread = NULL; 504 td->td_kse = NULL; 505 ke->ke_state = KES_UNQUEUED; 506 if (ke->ke_bound == td) { 507 printf("thread_exit: entered with ke_bound set\n"); 508 ke->ke_bound = NULL; /* should never happen */ 509 } 510 511 kse_reassign(ke); 512 PROC_UNLOCK(p); 513 td->td_state = TDS_INACTIVE; 514 td->td_proc = NULL; 515 td->td_ksegrp = NULL; 516 td->td_last_kse = NULL; 517 /* 518 * For now stash this here, however 519 * it's not a permanent solution. 520 * When we want to make KSEs exit as well 521 * we'll have to face this one again. 522 * Where will we hide it then? 523 * 524 * In borrower threads, stash it in the lender 525 * Where it won't be needed until 526 * this thread is long gone. 527 */ 528 if (ke->ke_bound) { 529 if (ke->ke_bound->td_standin) { 530 thread_stash(ke->ke_bound->td_standin); 531 } 532 ke->ke_bound->td_standin = td; 533 } else { 534 ke->ke_tdspare = td; 535 } 536 } else { 537 PROC_UNLOCK(p); 538 } 539 540 cpu_throw(); 541 /* NOTREACHED */ 542 } 543 544 /* 545 * Link a thread to a process. 546 * set up anything that needs to be initialized for it to 547 * be used by the process. 548 * 549 * Note that we do not link to the proc's ucred here. 550 * The thread is linked as if running but no KSE assigned. 551 */ 552 void 553 thread_link(struct thread *td, struct ksegrp *kg) 554 { 555 struct proc *p; 556 557 p = kg->kg_proc; 558 td->td_state = TDS_INACTIVE; 559 td->td_proc = p; 560 td->td_ksegrp = kg; 561 td->td_last_kse = NULL; 562 563 LIST_INIT(&td->td_contested); 564 callout_init(&td->td_slpcallout, 1); 565 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 566 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 567 p->p_numthreads++; 568 kg->kg_numthreads++; 569 if (oiks_debug && p->p_numthreads > max_threads_per_proc) { 570 printf("OIKS %d\n", p->p_numthreads); 571 if (oiks_debug > 1) 572 Debugger("OIKS"); 573 } 574 td->td_kse = NULL; 575 } 576 577 /* 578 * Create a thread and schedule it for upcall on the KSE given. 579 */ 580 struct thread * 581 thread_schedule_upcall(struct thread *td, struct kse *ke) 582 { 583 struct thread *td2; 584 int newkse; 585 586 mtx_assert(&sched_lock, MA_OWNED); 587 newkse = (ke != td->td_kse); 588 589 /* 590 * If the kse is already owned by another thread then we can't 591 * schedule an upcall because the other thread must be BOUND 592 * which means it is not in a position to take an upcall. 593 * We must be borrowing the KSE to allow us to complete some in-kernel 594 * work. When we complete, the Bound thread will have teh chance to 595 * complete. This thread will sleep as planned. Hopefully there will 596 * eventually be un unbound thread that can be converted to an 597 * upcall to report the completion of this thread. 598 */ 599 if (ke->ke_bound && ((ke->ke_bound->td_flags & TDF_UNBOUND) == 0)) { 600 return (NULL); 601 } 602 KASSERT((ke->ke_bound == NULL), ("kse already bound")); 603 604 if ((td2 = td->td_standin) != NULL) { 605 td->td_standin = NULL; 606 } else { 607 if (newkse) 608 panic("no reserve thread when called with a new kse"); 609 /* 610 * If called from (e.g.) sleep and we do not have 611 * a reserve thread, then we've used it, so do not 612 * create an upcall. 613 */ 614 return(NULL); 615 } 616 CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)", 617 td2, td->td_proc->p_pid, td->td_proc->p_comm); 618 bzero(&td2->td_startzero, 619 (unsigned)RANGEOF(struct thread, td_startzero, td_endzero)); 620 bcopy(&td->td_startcopy, &td2->td_startcopy, 621 (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); 622 thread_link(td2, ke->ke_ksegrp); 623 cpu_set_upcall(td2, td->td_pcb); 624 625 /* 626 * XXXKSE do we really need this? (default values for the 627 * frame). 628 */ 629 bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe)); 630 631 /* 632 * Bind the new thread to the KSE, 633 * and if it's our KSE, lend it back to ourself 634 * so we can continue running. 635 */ 636 td2->td_ucred = crhold(td->td_ucred); 637 td2->td_flags = TDF_UPCALLING; /* note: BOUND */ 638 td2->td_kse = ke; 639 td2->td_state = TDS_CAN_RUN; 640 td2->td_inhibitors = 0; 641 /* 642 * If called from msleep(), we are working on the current 643 * KSE so fake that we borrowed it. If called from 644 * kse_create(), don't, as we have a new kse too. 645 */ 646 if (!newkse) { 647 /* 648 * This thread will be scheduled when the current thread 649 * blocks, exits or tries to enter userspace, (which ever 650 * happens first). When that happens the KSe will "revert" 651 * to this thread in a BOUND manner. Since we are called 652 * from msleep() this is going to be "very soon" in nearly 653 * all cases. 654 */ 655 ke->ke_bound = td2; 656 TD_SET_LOAN(td2); 657 } else { 658 ke->ke_bound = NULL; 659 ke->ke_thread = td2; 660 setrunqueue(td2); 661 } 662 return (td2); /* bogus.. should be a void function */ 663 } 664 665 /* 666 * Schedule an upcall to notify a KSE process recieved signals. 667 * 668 * XXX - Modifying a sigset_t like this is totally bogus. 669 */ 670 struct thread * 671 signal_upcall(struct proc *p, int sig) 672 { 673 struct thread *td, *td2; 674 struct kse *ke; 675 sigset_t ss; 676 int error; 677 678 PROC_LOCK_ASSERT(p, MA_OWNED); 679 return (NULL); 680 681 td = FIRST_THREAD_IN_PROC(p); 682 ke = td->td_kse; 683 PROC_UNLOCK(p); 684 error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t)); 685 PROC_LOCK(p); 686 if (error) 687 return (NULL); 688 SIGADDSET(ss, sig); 689 PROC_UNLOCK(p); 690 error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t)); 691 PROC_LOCK(p); 692 if (error) 693 return (NULL); 694 if (td->td_standin == NULL) 695 td->td_standin = thread_alloc(); 696 mtx_lock_spin(&sched_lock); 697 td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */ 698 mtx_unlock_spin(&sched_lock); 699 return (td2); 700 } 701 702 /* 703 * The extra work we go through if we are a threaded process when we 704 * return to userland. 705 * 706 * If we are a KSE process and returning to user mode, check for 707 * extra work to do before we return (e.g. for more syscalls 708 * to complete first). If we were in a critical section, we should 709 * just return to let it finish. Same if we were in the UTS (in 710 * which case the mailbox's context's busy indicator will be set). 711 * The only traps we suport will have set the mailbox. 712 * We will clear it here. 713 */ 714 int 715 thread_userret(struct thread *td, struct trapframe *frame) 716 { 717 int error; 718 int unbound; 719 struct kse *ke; 720 struct ksegrp *kg; 721 struct thread *td2; 722 struct proc *p; 723 724 error = 0; 725 726 unbound = td->td_flags & TDF_UNBOUND; 727 728 kg = td->td_ksegrp; 729 p = td->td_proc; 730 731 /* 732 * Originally bound threads never upcall but they may 733 * loan out their KSE at this point. 734 * Upcalls imply bound.. They also may want to do some Philantropy. 735 * Unbound threads on the other hand either yield to other work 736 * or transform into an upcall. 737 * (having saved their context to user space in both cases) 738 */ 739 if (unbound ) { 740 /* 741 * We are an unbound thread, looking to return to 742 * user space. 743 * THere are several possibilities: 744 * 1) we are using a borrowed KSE. save state and exit. 745 * kse_reassign() will recycle the kse as needed, 746 * 2) we are not.. save state, and then convert ourself 747 * to be an upcall, bound to the KSE. 748 * if there are others that need the kse, 749 * give them a chance by doing an mi_switch(). 750 * Because we are bound, control will eventually return 751 * to us here. 752 * *** 753 * Save the thread's context, and link it 754 * into the KSEGRP's list of completed threads. 755 */ 756 error = thread_export_context(td); 757 td->td_mailbox = NULL; 758 if (error) { 759 /* 760 * If we are not running on a borrowed KSE, then 761 * failing to do the KSE operation just defaults 762 * back to synchonous operation, so just return from 763 * the syscall. If it IS borrowed, there is nothing 764 * we can do. We just lose that context. We 765 * probably should note this somewhere and send 766 * the process a signal. 767 */ 768 PROC_LOCK(td->td_proc); 769 psignal(td->td_proc, SIGSEGV); 770 mtx_lock_spin(&sched_lock); 771 if (td->td_kse->ke_bound == NULL) { 772 td->td_flags &= ~TDF_UNBOUND; 773 PROC_UNLOCK(td->td_proc); 774 mtx_unlock_spin(&sched_lock); 775 return (error); /* go sync */ 776 } 777 thread_exit(); 778 } 779 780 /* 781 * if the KSE is owned and we are borrowing it, 782 * don't make an upcall, just exit so that the owner 783 * can get its KSE if it wants it. 784 * Our context is already safely stored for later 785 * use by the UTS. 786 */ 787 PROC_LOCK(p); 788 mtx_lock_spin(&sched_lock); 789 if (td->td_kse->ke_bound) { 790 thread_exit(); 791 } 792 PROC_UNLOCK(p); 793 794 /* 795 * Turn ourself into a bound upcall. 796 * We will rely on kse_reassign() 797 * to make us run at a later time. 798 * We should look just like a sheduled upcall 799 * from msleep() or cv_wait(). 800 */ 801 td->td_flags &= ~TDF_UNBOUND; 802 td->td_flags |= TDF_UPCALLING; 803 /* Only get here if we have become an upcall */ 804 805 } else { 806 mtx_lock_spin(&sched_lock); 807 } 808 /* 809 * We ARE going back to userland with this KSE. 810 * Check for threads that need to borrow it. 811 * Optimisation: don't call mi_switch if no-one wants the KSE. 812 * Any other thread that comes ready after this missed the boat. 813 */ 814 ke = td->td_kse; 815 if ((td2 = kg->kg_last_assigned)) 816 td2 = TAILQ_NEXT(td2, td_runq); 817 else 818 td2 = TAILQ_FIRST(&kg->kg_runq); 819 if (td2) { 820 /* 821 * force a switch to more urgent 'in kernel' 822 * work. Control will return to this thread 823 * when there is no more work to do. 824 * kse_reassign() will do tha for us. 825 */ 826 TD_SET_LOAN(td); 827 ke->ke_bound = td; 828 ke->ke_thread = NULL; 829 mi_switch(); /* kse_reassign() will (re)find td2 */ 830 } 831 mtx_unlock_spin(&sched_lock); 832 833 /* 834 * Optimisation: 835 * Ensure that we have a spare thread available, 836 * for when we re-enter the kernel. 837 */ 838 if (td->td_standin == NULL) { 839 if (ke->ke_tdspare) { 840 td->td_standin = ke->ke_tdspare; 841 ke->ke_tdspare = NULL; 842 } else { 843 td->td_standin = thread_alloc(); 844 } 845 } 846 847 /* 848 * To get here, we know there is no other need for our 849 * KSE so we can proceed. If not upcalling, go back to 850 * userspace. If we are, get the upcall set up. 851 */ 852 if ((td->td_flags & TDF_UPCALLING) == 0) 853 return (0); 854 855 /* 856 * We must be an upcall to get this far. 857 * There is no more work to do and we are going to ride 858 * this thead/KSE up to userland as an upcall. 859 * Do the last parts of the setup needed for the upcall. 860 */ 861 CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)", 862 td, td->td_proc->p_pid, td->td_proc->p_comm); 863 864 /* 865 * Set user context to the UTS. 866 */ 867 cpu_set_upcall_kse(td, ke); 868 869 /* 870 * Put any completed mailboxes on this KSE's list. 871 */ 872 error = thread_link_mboxes(kg, ke); 873 if (error) 874 goto bad; 875 876 /* 877 * Set state and mailbox. 878 * From now on we are just a bound outgoing process. 879 * **Problem** userret is often called several times. 880 * it would be nice if this all happenned only on the first time 881 * through. (the scan for extra work etc.) 882 */ 883 td->td_flags &= ~TDF_UPCALLING; 884 #if 0 885 error = suword((caddr_t)ke->ke_mailbox + 886 offsetof(struct kse_mailbox, km_curthread), 0); 887 #else /* if user pointer arithmetic is ok in the kernel */ 888 error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0); 889 #endif 890 if (!error) 891 return (0); 892 893 bad: 894 /* 895 * Things are going to be so screwed we should just kill the process. 896 * how do we do that? 897 */ 898 PROC_LOCK(td->td_proc); 899 psignal(td->td_proc, SIGSEGV); 900 PROC_UNLOCK(td->td_proc); 901 return (error); /* go sync */ 902 } 903 904 /* 905 * Enforce single-threading. 906 * 907 * Returns 1 if the caller must abort (another thread is waiting to 908 * exit the process or similar). Process is locked! 909 * Returns 0 when you are successfully the only thread running. 910 * A process has successfully single threaded in the suspend mode when 911 * There are no threads in user mode. Threads in the kernel must be 912 * allowed to continue until they get to the user boundary. They may even 913 * copy out their return values and data before suspending. They may however be 914 * accellerated in reaching the user boundary as we will wake up 915 * any sleeping threads that are interruptable. (PCATCH). 916 */ 917 int 918 thread_single(int force_exit) 919 { 920 struct thread *td; 921 struct thread *td2; 922 struct proc *p; 923 924 td = curthread; 925 p = td->td_proc; 926 PROC_LOCK_ASSERT(p, MA_OWNED); 927 KASSERT((td != NULL), ("curthread is NULL")); 928 929 if ((p->p_flag & P_KSES) == 0) 930 return (0); 931 932 /* Is someone already single threading? */ 933 if (p->p_singlethread) 934 return (1); 935 936 if (force_exit == SINGLE_EXIT) 937 p->p_flag |= P_SINGLE_EXIT; 938 else 939 p->p_flag &= ~P_SINGLE_EXIT; 940 p->p_flag |= P_STOPPED_SINGLE; 941 p->p_singlethread = td; 942 while ((p->p_numthreads - p->p_suspcount) != 1) { 943 mtx_lock_spin(&sched_lock); 944 FOREACH_THREAD_IN_PROC(p, td2) { 945 if (td2 == td) 946 continue; 947 if (TD_IS_INHIBITED(td2)) { 948 if (TD_IS_SUSPENDED(td2)) { 949 if (force_exit == SINGLE_EXIT) { 950 thread_unsuspend_one(td2); 951 } 952 } 953 if ( TD_IS_SLEEPING(td2)) { 954 if (td2->td_flags & TDF_CVWAITQ) 955 cv_waitq_remove(td2); 956 else 957 unsleep(td2); 958 break; 959 } 960 if (TD_CAN_RUN(td2)) 961 setrunqueue(td2); 962 } 963 } 964 /* 965 * Wake us up when everyone else has suspended. 966 * In the mean time we suspend as well. 967 */ 968 thread_suspend_one(td); 969 mtx_unlock(&Giant); 970 PROC_UNLOCK(p); 971 mi_switch(); 972 mtx_unlock_spin(&sched_lock); 973 mtx_lock(&Giant); 974 PROC_LOCK(p); 975 } 976 return (0); 977 } 978 979 /* 980 * Called in from locations that can safely check to see 981 * whether we have to suspend or at least throttle for a 982 * single-thread event (e.g. fork). 983 * 984 * Such locations include userret(). 985 * If the "return_instead" argument is non zero, the thread must be able to 986 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 987 * 988 * The 'return_instead' argument tells the function if it may do a 989 * thread_exit() or suspend, or whether the caller must abort and back 990 * out instead. 991 * 992 * If the thread that set the single_threading request has set the 993 * P_SINGLE_EXIT bit in the process flags then this call will never return 994 * if 'return_instead' is false, but will exit. 995 * 996 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 997 *---------------+--------------------+--------------------- 998 * 0 | returns 0 | returns 0 or 1 999 * | when ST ends | immediatly 1000 *---------------+--------------------+--------------------- 1001 * 1 | thread exits | returns 1 1002 * | | immediatly 1003 * 0 = thread_exit() or suspension ok, 1004 * other = return error instead of stopping the thread. 1005 * 1006 * While a full suspension is under effect, even a single threading 1007 * thread would be suspended if it made this call (but it shouldn't). 1008 * This call should only be made from places where 1009 * thread_exit() would be safe as that may be the outcome unless 1010 * return_instead is set. 1011 */ 1012 int 1013 thread_suspend_check(int return_instead) 1014 { 1015 struct thread *td; 1016 struct proc *p; 1017 1018 td = curthread; 1019 p = td->td_proc; 1020 PROC_LOCK_ASSERT(p, MA_OWNED); 1021 while (P_SHOULDSTOP(p)) { 1022 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1023 KASSERT(p->p_singlethread != NULL, 1024 ("singlethread not set")); 1025 /* 1026 * The only suspension in action is a 1027 * single-threading. Single threader need not stop. 1028 * XXX Should be safe to access unlocked 1029 * as it can only be set to be true by us. 1030 */ 1031 if (p->p_singlethread == td) 1032 return (0); /* Exempt from stopping. */ 1033 } 1034 if (return_instead) 1035 return (1); 1036 1037 /* 1038 * If the process is waiting for us to exit, 1039 * this thread should just suicide. 1040 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 1041 */ 1042 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1043 mtx_lock_spin(&sched_lock); 1044 while (mtx_owned(&Giant)) 1045 mtx_unlock(&Giant); 1046 thread_exit(); 1047 } 1048 1049 /* 1050 * When a thread suspends, it just 1051 * moves to the processes's suspend queue 1052 * and stays there. 1053 * 1054 * XXXKSE if TDF_BOUND is true 1055 * it will not release it's KSE which might 1056 * lead to deadlock if there are not enough KSEs 1057 * to complete all waiting threads. 1058 * Maybe be able to 'lend' it out again. 1059 * (lent kse's can not go back to userland?) 1060 * and can only be lent in STOPPED state. 1061 */ 1062 mtx_lock_spin(&sched_lock); 1063 if ((p->p_flag & P_STOPPED_SIG) && 1064 (p->p_suspcount+1 == p->p_numthreads)) { 1065 mtx_unlock_spin(&sched_lock); 1066 PROC_LOCK(p->p_pptr); 1067 if ((p->p_pptr->p_procsig->ps_flag & 1068 PS_NOCLDSTOP) == 0) { 1069 psignal(p->p_pptr, SIGCHLD); 1070 } 1071 PROC_UNLOCK(p->p_pptr); 1072 mtx_lock_spin(&sched_lock); 1073 } 1074 mtx_assert(&Giant, MA_NOTOWNED); 1075 thread_suspend_one(td); 1076 PROC_UNLOCK(p); 1077 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1078 if (p->p_numthreads == p->p_suspcount) { 1079 thread_unsuspend_one(p->p_singlethread); 1080 } 1081 } 1082 p->p_stats->p_ru.ru_nivcsw++; 1083 mi_switch(); 1084 mtx_unlock_spin(&sched_lock); 1085 PROC_LOCK(p); 1086 } 1087 return (0); 1088 } 1089 1090 void 1091 thread_suspend_one(struct thread *td) 1092 { 1093 struct proc *p = td->td_proc; 1094 1095 mtx_assert(&sched_lock, MA_OWNED); 1096 p->p_suspcount++; 1097 TD_SET_SUSPENDED(td); 1098 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 1099 /* 1100 * Hack: If we are suspending but are on the sleep queue 1101 * then we are in msleep or the cv equivalent. We 1102 * want to look like we have two Inhibitors. 1103 */ 1104 if (TD_ON_SLEEPQ(td)) 1105 TD_SET_SLEEPING(td); 1106 } 1107 1108 void 1109 thread_unsuspend_one(struct thread *td) 1110 { 1111 struct proc *p = td->td_proc; 1112 1113 mtx_assert(&sched_lock, MA_OWNED); 1114 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1115 TD_CLR_SUSPENDED(td); 1116 p->p_suspcount--; 1117 setrunnable(td); 1118 } 1119 1120 /* 1121 * Allow all threads blocked by single threading to continue running. 1122 */ 1123 void 1124 thread_unsuspend(struct proc *p) 1125 { 1126 struct thread *td; 1127 1128 mtx_assert(&sched_lock, MA_OWNED); 1129 PROC_LOCK_ASSERT(p, MA_OWNED); 1130 if (!P_SHOULDSTOP(p)) { 1131 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1132 thread_unsuspend_one(td); 1133 } 1134 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1135 (p->p_numthreads == p->p_suspcount)) { 1136 /* 1137 * Stopping everything also did the job for the single 1138 * threading request. Now we've downgraded to single-threaded, 1139 * let it continue. 1140 */ 1141 thread_unsuspend_one(p->p_singlethread); 1142 } 1143 } 1144 1145 void 1146 thread_single_end(void) 1147 { 1148 struct thread *td; 1149 struct proc *p; 1150 1151 td = curthread; 1152 p = td->td_proc; 1153 PROC_LOCK_ASSERT(p, MA_OWNED); 1154 p->p_flag &= ~P_STOPPED_SINGLE; 1155 p->p_singlethread = NULL; 1156 /* 1157 * If there are other threads they mey now run, 1158 * unless of course there is a blanket 'stop order' 1159 * on the process. The single threader must be allowed 1160 * to continue however as this is a bad place to stop. 1161 */ 1162 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 1163 mtx_lock_spin(&sched_lock); 1164 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1165 thread_unsuspend_one(td); 1166 } 1167 mtx_unlock_spin(&sched_lock); 1168 } 1169 } 1170 1171 1172