1 /* 2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice(s), this list of conditions and the following disclaimer as 10 * the first lines of this file unmodified other than the possible 11 * addition of one or more copyright notices. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice(s), this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/smp.h> 39 #include <sys/sysctl.h> 40 #include <sys/sysproto.h> 41 #include <sys/filedesc.h> 42 #include <sys/sched.h> 43 #include <sys/signalvar.h> 44 #include <sys/sx.h> 45 #include <sys/tty.h> 46 #include <sys/user.h> 47 #include <sys/jail.h> 48 #include <sys/kse.h> 49 #include <sys/ktr.h> 50 #include <sys/ucontext.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_object.h> 54 #include <vm/pmap.h> 55 #include <vm/uma.h> 56 #include <vm/vm_map.h> 57 58 #include <machine/frame.h> 59 60 /* 61 * KSEGRP related storage. 62 */ 63 static uma_zone_t ksegrp_zone; 64 static uma_zone_t kse_zone; 65 static uma_zone_t thread_zone; 66 67 /* DEBUG ONLY */ 68 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 69 static int thread_debug = 0; 70 SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW, 71 &thread_debug, 0, "thread debug"); 72 73 static int max_threads_per_proc = 30; 74 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 75 &max_threads_per_proc, 0, "Limit on threads per proc"); 76 77 static int max_groups_per_proc = 5; 78 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 79 &max_groups_per_proc, 0, "Limit on thread groups per proc"); 80 81 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 82 83 struct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 84 TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); 85 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 86 struct mtx zombie_thread_lock; 87 MTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock, 88 "zombie_thread_lock", MTX_SPIN); 89 90 static void kse_purge(struct proc *p, struct thread *td); 91 92 /* 93 * Prepare a thread for use. 94 */ 95 static void 96 thread_ctor(void *mem, int size, void *arg) 97 { 98 struct thread *td; 99 100 td = (struct thread *)mem; 101 td->td_state = TDS_INACTIVE; 102 td->td_flags |= TDF_UNBOUND; 103 } 104 105 /* 106 * Reclaim a thread after use. 107 */ 108 static void 109 thread_dtor(void *mem, int size, void *arg) 110 { 111 struct thread *td; 112 113 td = (struct thread *)mem; 114 115 #ifdef INVARIANTS 116 /* Verify that this thread is in a safe state to free. */ 117 switch (td->td_state) { 118 case TDS_INHIBITED: 119 case TDS_RUNNING: 120 case TDS_CAN_RUN: 121 case TDS_RUNQ: 122 /* 123 * We must never unlink a thread that is in one of 124 * these states, because it is currently active. 125 */ 126 panic("bad state for thread unlinking"); 127 /* NOTREACHED */ 128 case TDS_INACTIVE: 129 break; 130 default: 131 panic("bad thread state"); 132 /* NOTREACHED */ 133 } 134 #endif 135 } 136 137 /* 138 * Initialize type-stable parts of a thread (when newly created). 139 */ 140 static void 141 thread_init(void *mem, int size) 142 { 143 struct thread *td; 144 145 td = (struct thread *)mem; 146 mtx_lock(&Giant); 147 pmap_new_thread(td, 0); 148 mtx_unlock(&Giant); 149 cpu_thread_setup(td); 150 td->td_sched = (struct td_sched *)&td[1]; 151 } 152 153 /* 154 * Tear down type-stable parts of a thread (just before being discarded). 155 */ 156 static void 157 thread_fini(void *mem, int size) 158 { 159 struct thread *td; 160 161 td = (struct thread *)mem; 162 pmap_dispose_thread(td); 163 } 164 /* 165 * Initialize type-stable parts of a kse (when newly created). 166 */ 167 static void 168 kse_init(void *mem, int size) 169 { 170 struct kse *ke; 171 172 ke = (struct kse *)mem; 173 ke->ke_sched = (struct ke_sched *)&ke[1]; 174 } 175 /* 176 * Initialize type-stable parts of a ksegrp (when newly created). 177 */ 178 static void 179 ksegrp_init(void *mem, int size) 180 { 181 struct ksegrp *kg; 182 183 kg = (struct ksegrp *)mem; 184 kg->kg_sched = (struct kg_sched *)&kg[1]; 185 } 186 187 /* 188 * KSE is linked onto the idle queue. 189 */ 190 void 191 kse_link(struct kse *ke, struct ksegrp *kg) 192 { 193 struct proc *p = kg->kg_proc; 194 195 TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); 196 kg->kg_kses++; 197 ke->ke_state = KES_UNQUEUED; 198 ke->ke_proc = p; 199 ke->ke_ksegrp = kg; 200 ke->ke_owner = NULL; 201 ke->ke_thread = NULL; 202 ke->ke_oncpu = NOCPU; 203 } 204 205 void 206 kse_unlink(struct kse *ke) 207 { 208 struct ksegrp *kg; 209 210 mtx_assert(&sched_lock, MA_OWNED); 211 kg = ke->ke_ksegrp; 212 213 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 214 if (--kg->kg_kses == 0) { 215 ksegrp_unlink(kg); 216 } 217 /* 218 * Aggregate stats from the KSE 219 */ 220 kse_stash(ke); 221 } 222 223 void 224 ksegrp_link(struct ksegrp *kg, struct proc *p) 225 { 226 227 TAILQ_INIT(&kg->kg_threads); 228 TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 229 TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */ 230 TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ 231 TAILQ_INIT(&kg->kg_lq); /* loan kses in ksegrp */ 232 kg->kg_proc = p; 233 /* the following counters are in the -zero- section and may not need clearing */ 234 kg->kg_numthreads = 0; 235 kg->kg_runnable = 0; 236 kg->kg_kses = 0; 237 kg->kg_loan_kses = 0; 238 kg->kg_runq_kses = 0; /* XXXKSE change name */ 239 /* link it in now that it's consistent */ 240 p->p_numksegrps++; 241 TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 242 } 243 244 void 245 ksegrp_unlink(struct ksegrp *kg) 246 { 247 struct proc *p; 248 249 mtx_assert(&sched_lock, MA_OWNED); 250 p = kg->kg_proc; 251 KASSERT(((kg->kg_numthreads == 0) && (kg->kg_kses == 0)), 252 ("kseg_unlink: residual threads or KSEs")); 253 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 254 p->p_numksegrps--; 255 /* 256 * Aggregate stats from the KSE 257 */ 258 ksegrp_stash(kg); 259 } 260 261 /* 262 * for a newly created process, 263 * link up a the structure and its initial threads etc. 264 */ 265 void 266 proc_linkup(struct proc *p, struct ksegrp *kg, 267 struct kse *ke, struct thread *td) 268 { 269 270 TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 271 TAILQ_INIT(&p->p_threads); /* all threads in proc */ 272 TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 273 p->p_numksegrps = 0; 274 p->p_numthreads = 0; 275 276 ksegrp_link(kg, p); 277 kse_link(ke, kg); 278 thread_link(td, kg); 279 } 280 281 int 282 kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap) 283 { 284 struct proc *p; 285 struct thread *td2; 286 287 p = td->td_proc; 288 /* KSE-enabled processes only, please. */ 289 if (!(p->p_flag & P_KSES)) 290 return (EINVAL); 291 if (uap->tmbx == NULL) 292 return (EINVAL); 293 mtx_lock_spin(&sched_lock); 294 FOREACH_THREAD_IN_PROC(p, td2) { 295 if (td2->td_mailbox == uap->tmbx) { 296 td2->td_flags |= TDF_INTERRUPT; 297 if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) { 298 if (td2->td_flags & TDF_CVWAITQ) 299 cv_abort(td2); 300 else 301 abortsleep(td2); 302 } 303 mtx_unlock_spin(&sched_lock); 304 return (0); 305 } 306 } 307 mtx_unlock_spin(&sched_lock); 308 return (ESRCH); 309 } 310 311 int 312 kse_exit(struct thread *td, struct kse_exit_args *uap) 313 { 314 struct proc *p; 315 struct ksegrp *kg; 316 struct kse *ke; 317 318 p = td->td_proc; 319 /* Only UTS can do the syscall */ 320 if (!(p->p_flag & P_KSES) || (td->td_mailbox != NULL)) 321 return (EINVAL); 322 kg = td->td_ksegrp; 323 /* serialize killing kse */ 324 PROC_LOCK(p); 325 mtx_lock_spin(&sched_lock); 326 if ((kg->kg_kses == 1) && (kg->kg_numthreads > 1)) { 327 mtx_unlock_spin(&sched_lock); 328 PROC_UNLOCK(p); 329 return (EDEADLK); 330 } 331 ke = td->td_kse; 332 if (p->p_numthreads == 1) { 333 ke->ke_flags &= ~KEF_DOUPCALL; 334 ke->ke_mailbox = NULL; 335 p->p_flag &= ~P_KSES; 336 mtx_unlock_spin(&sched_lock); 337 PROC_UNLOCK(p); 338 } else { 339 ke->ke_flags |= KEF_EXIT; 340 thread_exit(); 341 /* NOTREACHED */ 342 } 343 return (0); 344 } 345 346 /* 347 * Either becomes an upcall or waits for an awakening event and 348 * THEN becomes an upcall. Only error cases return. 349 */ 350 int 351 kse_release(struct thread * td, struct kse_release_args * uap) 352 { 353 struct proc *p; 354 struct ksegrp *kg; 355 356 p = td->td_proc; 357 kg = td->td_ksegrp; 358 /* 359 * kse must have a mailbox ready for upcall, and only UTS can 360 * do the syscall. 361 */ 362 if (!(p->p_flag & P_KSES) || 363 (td->td_mailbox != NULL) || 364 (td->td_kse->ke_mailbox == NULL)) 365 return (EINVAL); 366 367 PROC_LOCK(p); 368 mtx_lock_spin(&sched_lock); 369 /* Change OURSELF to become an upcall. */ 370 td->td_flags = TDF_UPCALLING; /* BOUND */ 371 if (!(td->td_kse->ke_flags & (KEF_DOUPCALL|KEF_ASTPENDING)) && 372 (kg->kg_completed == NULL)) { 373 /* 374 * The KSE will however be lendable. 375 */ 376 TD_SET_IDLE(td); 377 PROC_UNLOCK(p); 378 p->p_stats->p_ru.ru_nvcsw++; 379 mi_switch(); 380 mtx_unlock_spin(&sched_lock); 381 } else { 382 mtx_unlock_spin(&sched_lock); 383 PROC_UNLOCK(p); 384 } 385 return (0); 386 } 387 388 /* struct kse_wakeup_args { 389 struct kse_mailbox *mbx; 390 }; */ 391 int 392 kse_wakeup(struct thread *td, struct kse_wakeup_args *uap) 393 { 394 struct proc *p; 395 struct kse *ke; 396 struct ksegrp *kg; 397 struct thread *td2; 398 399 p = td->td_proc; 400 td2 = NULL; 401 /* KSE-enabled processes only, please. */ 402 if (!(p->p_flag & P_KSES)) 403 return EINVAL; 404 405 mtx_lock_spin(&sched_lock); 406 if (uap->mbx) { 407 FOREACH_KSEGRP_IN_PROC(p, kg) { 408 FOREACH_KSE_IN_GROUP(kg, ke) { 409 if (ke->ke_mailbox != uap->mbx) 410 continue; 411 td2 = ke->ke_owner; 412 KASSERT((td2 != NULL),("KSE with no owner")); 413 break; 414 } 415 if (td2) { 416 break; 417 } 418 } 419 } else { 420 /* 421 * look for any idle KSE to resurrect. 422 */ 423 kg = td->td_ksegrp; 424 FOREACH_KSE_IN_GROUP(kg, ke) { 425 td2 = ke->ke_owner; 426 KASSERT((td2 != NULL),("KSE with no owner2")); 427 if (TD_IS_IDLE(td2)) 428 break; 429 } 430 KASSERT((td2 != NULL), ("no thread(s)")); 431 } 432 if (td2) { 433 if (TD_IS_IDLE(td2)) { 434 TD_CLR_IDLE(td2); 435 setrunnable(td2); 436 } else if (td != td2) { 437 /* guarantee do an upcall ASAP */ 438 td2->td_kse->ke_flags |= KEF_DOUPCALL; 439 } 440 mtx_unlock_spin(&sched_lock); 441 return (0); 442 } 443 mtx_unlock_spin(&sched_lock); 444 return (ESRCH); 445 } 446 447 /* 448 * No new KSEG: first call: use current KSE, don't schedule an upcall 449 * All other situations, do allocate a new KSE and schedule an upcall on it. 450 */ 451 /* struct kse_create_args { 452 struct kse_mailbox *mbx; 453 int newgroup; 454 }; */ 455 int 456 kse_create(struct thread *td, struct kse_create_args *uap) 457 { 458 struct kse *newke; 459 struct kse *ke; 460 struct ksegrp *newkg; 461 struct ksegrp *kg; 462 struct proc *p; 463 struct kse_mailbox mbx; 464 int err; 465 466 p = td->td_proc; 467 if ((err = copyin(uap->mbx, &mbx, sizeof(mbx)))) 468 return (err); 469 470 p->p_flag |= P_KSES; /* easier to just set it than to test and set */ 471 kg = td->td_ksegrp; 472 if (uap->newgroup) { 473 if (p->p_numksegrps >= max_groups_per_proc) 474 return (EPROCLIM); 475 /* 476 * If we want a new KSEGRP it doesn't matter whether 477 * we have already fired up KSE mode before or not. 478 * We put the process in KSE mode and create a new KSEGRP 479 * and KSE. If our KSE has not got a mailbox yet then 480 * that doesn't matter, just leave it that way. It will 481 * ensure that this thread stay BOUND. It's possible 482 * that the call came form a threaded library and the main 483 * program knows nothing of threads. 484 */ 485 newkg = ksegrp_alloc(); 486 bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp, 487 kg_startzero, kg_endzero)); 488 bcopy(&kg->kg_startcopy, &newkg->kg_startcopy, 489 RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy)); 490 newke = kse_alloc(); 491 } else { 492 /* 493 * Otherwise, if we have already set this KSE 494 * to have a mailbox, we want to make another KSE here, 495 * but only if there are not already the limit, which 496 * is 1 per CPU max. 497 * 498 * If the current KSE doesn't have a mailbox we just use it 499 * and give it one. 500 * 501 * Because we don't like to access 502 * the KSE outside of schedlock if we are UNBOUND, 503 * (because it can change if we are preempted by an interrupt) 504 * we can deduce it as having a mailbox if we are UNBOUND, 505 * and only need to actually look at it if we are BOUND, 506 * which is safe. 507 */ 508 if ((td->td_flags & TDF_UNBOUND) || td->td_kse->ke_mailbox) { 509 if (thread_debug == 0) { /* if debugging, allow more */ 510 #ifdef SMP 511 if (kg->kg_kses > mp_ncpus) 512 #endif 513 return (EPROCLIM); 514 } 515 newke = kse_alloc(); 516 } else { 517 newke = NULL; 518 } 519 newkg = NULL; 520 } 521 if (newke) { 522 bzero(&newke->ke_startzero, RANGEOF(struct kse, 523 ke_startzero, ke_endzero)); 524 #if 0 525 bcopy(&ke->ke_startcopy, &newke->ke_startcopy, 526 RANGEOF(struct kse, ke_startcopy, ke_endcopy)); 527 #endif 528 /* For the first call this may not have been set */ 529 if (td->td_standin == NULL) { 530 td->td_standin = thread_alloc(); 531 } 532 mtx_lock_spin(&sched_lock); 533 if (newkg) { 534 if (p->p_numksegrps >= max_groups_per_proc) { 535 mtx_unlock_spin(&sched_lock); 536 ksegrp_free(newkg); 537 kse_free(newke); 538 return (EPROCLIM); 539 } 540 ksegrp_link(newkg, p); 541 } 542 else 543 newkg = kg; 544 kse_link(newke, newkg); 545 if (p->p_sflag & PS_NEEDSIGCHK) 546 newke->ke_flags |= KEF_ASTPENDING; 547 newke->ke_mailbox = uap->mbx; 548 newke->ke_upcall = mbx.km_func; 549 bcopy(&mbx.km_stack, &newke->ke_stack, sizeof(stack_t)); 550 thread_schedule_upcall(td, newke); 551 mtx_unlock_spin(&sched_lock); 552 } else { 553 /* 554 * If we didn't allocate a new KSE then the we are using 555 * the exisiting (BOUND) kse. 556 */ 557 ke = td->td_kse; 558 ke->ke_mailbox = uap->mbx; 559 ke->ke_upcall = mbx.km_func; 560 bcopy(&mbx.km_stack, &ke->ke_stack, sizeof(stack_t)); 561 } 562 /* 563 * Fill out the KSE-mode specific fields of the new kse. 564 */ 565 return (0); 566 } 567 568 /* 569 * Fill a ucontext_t with a thread's context information. 570 * 571 * This is an analogue to getcontext(3). 572 */ 573 void 574 thread_getcontext(struct thread *td, ucontext_t *uc) 575 { 576 577 /* 578 * XXX this is declared in a MD include file, i386/include/ucontext.h but 579 * is used in MI code. 580 */ 581 #ifdef __i386__ 582 get_mcontext(td, &uc->uc_mcontext); 583 #endif 584 uc->uc_sigmask = td->td_proc->p_sigmask; 585 } 586 587 /* 588 * Set a thread's context from a ucontext_t. 589 * 590 * This is an analogue to setcontext(3). 591 */ 592 int 593 thread_setcontext(struct thread *td, ucontext_t *uc) 594 { 595 int ret; 596 597 /* 598 * XXX this is declared in a MD include file, i386/include/ucontext.h but 599 * is used in MI code. 600 */ 601 #ifdef __i386__ 602 ret = set_mcontext(td, &uc->uc_mcontext); 603 #else 604 ret = ENOSYS; 605 #endif 606 if (ret == 0) { 607 SIG_CANTMASK(uc->uc_sigmask); 608 PROC_LOCK(td->td_proc); 609 td->td_proc->p_sigmask = uc->uc_sigmask; 610 PROC_UNLOCK(td->td_proc); 611 } 612 return (ret); 613 } 614 615 /* 616 * Initialize global thread allocation resources. 617 */ 618 void 619 threadinit(void) 620 { 621 622 #ifndef __ia64__ 623 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 624 thread_ctor, thread_dtor, thread_init, thread_fini, 625 UMA_ALIGN_CACHE, 0); 626 #else 627 /* 628 * XXX the ia64 kstack allocator is really lame and is at the mercy 629 * of contigmallloc(). This hackery is to pre-construct a whole 630 * pile of thread structures with associated kernel stacks early 631 * in the system startup while contigmalloc() still works. Once we 632 * have them, keep them. Sigh. 633 */ 634 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 635 thread_ctor, thread_dtor, thread_init, thread_fini, 636 UMA_ALIGN_CACHE, UMA_ZONE_NOFREE); 637 uma_prealloc(thread_zone, 512); /* XXX arbitary */ 638 #endif 639 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 640 NULL, NULL, ksegrp_init, NULL, 641 UMA_ALIGN_CACHE, 0); 642 kse_zone = uma_zcreate("KSE", sched_sizeof_kse(), 643 NULL, NULL, kse_init, NULL, 644 UMA_ALIGN_CACHE, 0); 645 } 646 647 /* 648 * Stash an embarasingly extra thread into the zombie thread queue. 649 */ 650 void 651 thread_stash(struct thread *td) 652 { 653 mtx_lock_spin(&zombie_thread_lock); 654 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 655 mtx_unlock_spin(&zombie_thread_lock); 656 } 657 658 /* 659 * Stash an embarasingly extra kse into the zombie kse queue. 660 */ 661 void 662 kse_stash(struct kse *ke) 663 { 664 mtx_lock_spin(&zombie_thread_lock); 665 TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); 666 mtx_unlock_spin(&zombie_thread_lock); 667 } 668 669 /* 670 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 671 */ 672 void 673 ksegrp_stash(struct ksegrp *kg) 674 { 675 mtx_lock_spin(&zombie_thread_lock); 676 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 677 mtx_unlock_spin(&zombie_thread_lock); 678 } 679 680 /* 681 * Reap zombie threads. 682 */ 683 void 684 thread_reap(void) 685 { 686 struct thread *td_first, *td_next; 687 struct kse *ke_first, *ke_next; 688 struct ksegrp *kg_first, * kg_next; 689 690 /* 691 * don't even bother to lock if none at this instant 692 * We really don't care about the next instant.. 693 */ 694 if ((!TAILQ_EMPTY(&zombie_threads)) 695 || (!TAILQ_EMPTY(&zombie_kses)) 696 || (!TAILQ_EMPTY(&zombie_ksegrps))) { 697 mtx_lock_spin(&zombie_thread_lock); 698 td_first = TAILQ_FIRST(&zombie_threads); 699 ke_first = TAILQ_FIRST(&zombie_kses); 700 kg_first = TAILQ_FIRST(&zombie_ksegrps); 701 if (td_first) 702 TAILQ_INIT(&zombie_threads); 703 if (ke_first) 704 TAILQ_INIT(&zombie_kses); 705 if (kg_first) 706 TAILQ_INIT(&zombie_ksegrps); 707 mtx_unlock_spin(&zombie_thread_lock); 708 while (td_first) { 709 td_next = TAILQ_NEXT(td_first, td_runq); 710 thread_free(td_first); 711 td_first = td_next; 712 } 713 while (ke_first) { 714 ke_next = TAILQ_NEXT(ke_first, ke_procq); 715 kse_free(ke_first); 716 ke_first = ke_next; 717 } 718 while (kg_first) { 719 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 720 ksegrp_free(kg_first); 721 kg_first = kg_next; 722 } 723 } 724 } 725 726 /* 727 * Allocate a ksegrp. 728 */ 729 struct ksegrp * 730 ksegrp_alloc(void) 731 { 732 return (uma_zalloc(ksegrp_zone, 0)); 733 } 734 735 /* 736 * Allocate a kse. 737 */ 738 struct kse * 739 kse_alloc(void) 740 { 741 return (uma_zalloc(kse_zone, 0)); 742 } 743 744 /* 745 * Allocate a thread. 746 */ 747 struct thread * 748 thread_alloc(void) 749 { 750 thread_reap(); /* check if any zombies to get */ 751 return (uma_zalloc(thread_zone, 0)); 752 } 753 754 /* 755 * Deallocate a ksegrp. 756 */ 757 void 758 ksegrp_free(struct ksegrp *td) 759 { 760 uma_zfree(ksegrp_zone, td); 761 } 762 763 /* 764 * Deallocate a kse. 765 */ 766 void 767 kse_free(struct kse *td) 768 { 769 uma_zfree(kse_zone, td); 770 } 771 772 /* 773 * Deallocate a thread. 774 */ 775 void 776 thread_free(struct thread *td) 777 { 778 779 cpu_thread_clean(td); 780 uma_zfree(thread_zone, td); 781 } 782 783 /* 784 * Store the thread context in the UTS's mailbox. 785 * then add the mailbox at the head of a list we are building in user space. 786 * The list is anchored in the ksegrp structure. 787 */ 788 int 789 thread_export_context(struct thread *td) 790 { 791 struct proc *p; 792 struct ksegrp *kg; 793 uintptr_t mbx; 794 void *addr; 795 int error; 796 ucontext_t uc; 797 uint temp; 798 799 p = td->td_proc; 800 kg = td->td_ksegrp; 801 802 /* Export the user/machine context. */ 803 #if 0 804 addr = (caddr_t)td->td_mailbox + 805 offsetof(struct kse_thr_mailbox, tm_context); 806 #else /* if user pointer arithmetic is valid in the kernel */ 807 addr = (void *)(&td->td_mailbox->tm_context); 808 #endif 809 error = copyin(addr, &uc, sizeof(ucontext_t)); 810 if (error) 811 goto bad; 812 813 thread_getcontext(td, &uc); 814 error = copyout(&uc, addr, sizeof(ucontext_t)); 815 if (error) 816 goto bad; 817 818 /* get address in latest mbox of list pointer */ 819 #if 0 820 addr = (caddr_t)td->td_mailbox 821 + offsetof(struct kse_thr_mailbox , tm_next); 822 #else /* if user pointer arithmetic is valid in the kernel */ 823 addr = (void *)(&td->td_mailbox->tm_next); 824 #endif 825 /* 826 * Put the saved address of the previous first 827 * entry into this one 828 */ 829 for (;;) { 830 mbx = (uintptr_t)kg->kg_completed; 831 if (suword(addr, mbx)) { 832 error = EFAULT; 833 goto bad; 834 } 835 PROC_LOCK(p); 836 if (mbx == (uintptr_t)kg->kg_completed) { 837 kg->kg_completed = td->td_mailbox; 838 PROC_UNLOCK(p); 839 break; 840 } 841 PROC_UNLOCK(p); 842 } 843 addr = (caddr_t)td->td_mailbox 844 + offsetof(struct kse_thr_mailbox, tm_sticks); 845 temp = fuword(addr) + td->td_usticks; 846 if (suword(addr, temp)) 847 goto bad; 848 return (0); 849 850 bad: 851 PROC_LOCK(p); 852 psignal(p, SIGSEGV); 853 PROC_UNLOCK(p); 854 return (error); 855 } 856 857 /* 858 * Take the list of completed mailboxes for this KSEGRP and put them on this 859 * KSE's mailbox as it's the next one going up. 860 */ 861 static int 862 thread_link_mboxes(struct ksegrp *kg, struct kse *ke) 863 { 864 struct proc *p = kg->kg_proc; 865 void *addr; 866 uintptr_t mbx; 867 868 #if 0 869 addr = (caddr_t)ke->ke_mailbox 870 + offsetof(struct kse_mailbox, km_completed); 871 #else /* if user pointer arithmetic is valid in the kernel */ 872 addr = (void *)(&ke->ke_mailbox->km_completed); 873 #endif 874 for (;;) { 875 mbx = (uintptr_t)kg->kg_completed; 876 if (suword(addr, mbx)) { 877 PROC_LOCK(p); 878 psignal(p, SIGSEGV); 879 PROC_UNLOCK(p); 880 return (EFAULT); 881 } 882 /* XXXKSE could use atomic CMPXCH here */ 883 PROC_LOCK(p); 884 if (mbx == (uintptr_t)kg->kg_completed) { 885 kg->kg_completed = NULL; 886 PROC_UNLOCK(p); 887 break; 888 } 889 PROC_UNLOCK(p); 890 } 891 return (0); 892 } 893 894 /* 895 * This function should be called at statclock interrupt time 896 */ 897 int 898 thread_add_ticks_intr(int user, uint ticks) 899 { 900 struct thread *td = curthread; 901 struct kse *ke = td->td_kse; 902 903 if (ke->ke_mailbox == NULL) 904 return -1; 905 if (user) { 906 /* Current always do via ast() */ 907 ke->ke_flags |= KEF_ASTPENDING; 908 ke->ke_uuticks += ticks; 909 } else { 910 if (td->td_mailbox != NULL) 911 td->td_usticks += ticks; 912 else 913 ke->ke_usticks += ticks; 914 } 915 return 0; 916 } 917 918 static int 919 thread_update_uticks(void) 920 { 921 struct thread *td = curthread; 922 struct proc *p = td->td_proc; 923 struct kse *ke = td->td_kse; 924 struct kse_thr_mailbox *tmbx; 925 caddr_t addr; 926 uint uticks, sticks; 927 928 if (ke->ke_mailbox == NULL) 929 return 0; 930 931 uticks = ke->ke_uuticks; 932 ke->ke_uuticks = 0; 933 sticks = ke->ke_usticks; 934 ke->ke_usticks = 0; 935 #if 0 936 tmbx = (void *)fuword((caddr_t)ke->ke_mailbox 937 + offsetof(struct kse_mailbox, km_curthread)); 938 #else /* if user pointer arithmetic is ok in the kernel */ 939 tmbx = (void *)fuword( (void *)&ke->ke_mailbox->km_curthread); 940 #endif 941 if ((tmbx == NULL) || (tmbx == (void *)-1)) 942 return 0; 943 if (uticks) { 944 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_uticks); 945 uticks += fuword(addr); 946 if (suword(addr, uticks)) 947 goto bad; 948 } 949 if (sticks) { 950 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_sticks); 951 sticks += fuword(addr); 952 if (suword(addr, sticks)) 953 goto bad; 954 } 955 return 0; 956 bad: 957 PROC_LOCK(p); 958 psignal(p, SIGSEGV); 959 PROC_UNLOCK(p); 960 return -1; 961 } 962 963 /* 964 * Discard the current thread and exit from its context. 965 * 966 * Because we can't free a thread while we're operating under its context, 967 * push the current thread into our CPU's deadthread holder. This means 968 * we needn't worry about someone else grabbing our context before we 969 * do a cpu_throw(). 970 */ 971 void 972 thread_exit(void) 973 { 974 struct thread *td; 975 struct kse *ke; 976 struct proc *p; 977 struct ksegrp *kg; 978 979 td = curthread; 980 kg = td->td_ksegrp; 981 p = td->td_proc; 982 ke = td->td_kse; 983 984 mtx_assert(&sched_lock, MA_OWNED); 985 KASSERT(p != NULL, ("thread exiting without a process")); 986 KASSERT(ke != NULL, ("thread exiting without a kse")); 987 KASSERT(kg != NULL, ("thread exiting without a kse group")); 988 PROC_LOCK_ASSERT(p, MA_OWNED); 989 CTR1(KTR_PROC, "thread_exit: thread %p", td); 990 KASSERT(!mtx_owned(&Giant), ("dying thread owns giant")); 991 992 if (td->td_standin != NULL) { 993 thread_stash(td->td_standin); 994 td->td_standin = NULL; 995 } 996 997 cpu_thread_exit(td); /* XXXSMP */ 998 999 /* 1000 * The last thread is left attached to the process 1001 * So that the whole bundle gets recycled. Skip 1002 * all this stuff. 1003 */ 1004 if (p->p_numthreads > 1) { 1005 /* 1006 * Unlink this thread from its proc and the kseg. 1007 * In keeping with the other structs we probably should 1008 * have a thread_unlink() that does some of this but it 1009 * would only be called from here (I think) so it would 1010 * be a waste. (might be useful for proc_fini() as well.) 1011 */ 1012 TAILQ_REMOVE(&p->p_threads, td, td_plist); 1013 p->p_numthreads--; 1014 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 1015 kg->kg_numthreads--; 1016 /* 1017 * The test below is NOT true if we are the 1018 * sole exiting thread. P_STOPPED_SNGL is unset 1019 * in exit1() after it is the only survivor. 1020 */ 1021 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1022 if (p->p_numthreads == p->p_suspcount) { 1023 thread_unsuspend_one(p->p_singlethread); 1024 } 1025 } 1026 1027 /* Reassign this thread's KSE. */ 1028 ke->ke_state = KES_UNQUEUED; 1029 1030 /* 1031 * Decide what to do with the KSE attached to this thread. 1032 * XXX Possibly kse_reassign should do both cases as it already 1033 * does some of this. 1034 */ 1035 if (ke->ke_flags & KEF_EXIT) { 1036 KASSERT((ke->ke_owner == td), 1037 ("thread_exit: KSE exiting with non-owner thread")); 1038 ke->ke_thread = NULL; 1039 td->td_kse = NULL; 1040 kse_unlink(ke); 1041 } else { 1042 TD_SET_EXITING(td); /* definitly not runnable */ 1043 kse_reassign(ke); 1044 } 1045 PROC_UNLOCK(p); 1046 td->td_state = TDS_INACTIVE; 1047 td->td_proc = NULL; 1048 td->td_ksegrp = NULL; 1049 td->td_last_kse = NULL; 1050 PCPU_SET(deadthread, td); 1051 } else { 1052 PROC_UNLOCK(p); 1053 } 1054 cpu_throw(); 1055 /* NOTREACHED */ 1056 } 1057 1058 /* 1059 * Do any thread specific cleanups that may be needed in wait() 1060 * called with Giant held, proc and schedlock not held. 1061 */ 1062 void 1063 thread_wait(struct proc *p) 1064 { 1065 struct thread *td; 1066 1067 KASSERT((p->p_numthreads == 1), ("Muliple threads in wait1()")); 1068 KASSERT((p->p_numksegrps == 1), ("Muliple ksegrps in wait1()")); 1069 FOREACH_THREAD_IN_PROC(p, td) { 1070 if (td->td_standin != NULL) { 1071 thread_free(td->td_standin); 1072 td->td_standin = NULL; 1073 } 1074 cpu_thread_clean(td); 1075 } 1076 thread_reap(); /* check for zombie threads etc. */ 1077 } 1078 1079 /* 1080 * Link a thread to a process. 1081 * set up anything that needs to be initialized for it to 1082 * be used by the process. 1083 * 1084 * Note that we do not link to the proc's ucred here. 1085 * The thread is linked as if running but no KSE assigned. 1086 */ 1087 void 1088 thread_link(struct thread *td, struct ksegrp *kg) 1089 { 1090 struct proc *p; 1091 1092 p = kg->kg_proc; 1093 td->td_state = TDS_INACTIVE; 1094 td->td_proc = p; 1095 td->td_ksegrp = kg; 1096 td->td_last_kse = NULL; 1097 1098 LIST_INIT(&td->td_contested); 1099 callout_init(&td->td_slpcallout, 1); 1100 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 1101 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 1102 p->p_numthreads++; 1103 kg->kg_numthreads++; 1104 td->td_kse = NULL; 1105 } 1106 1107 void 1108 kse_purge(struct proc *p, struct thread *td) 1109 { 1110 /* XXXKSE think about this.. 1111 may need to wake up threads on loan queue. */ 1112 struct ksegrp *kg; 1113 1114 KASSERT(p->p_numthreads == 1, ("bad thread number")); 1115 mtx_lock_spin(&sched_lock); 1116 while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) { 1117 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 1118 p->p_numksegrps--; 1119 KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) || 1120 ((kg->kg_kses == 1) && (kg == td->td_ksegrp)), 1121 ("wrong kg_kses")); 1122 if (kg != td->td_ksegrp) { 1123 ksegrp_stash(kg); 1124 } 1125 } 1126 TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp); 1127 p->p_numksegrps++; 1128 mtx_unlock_spin(&sched_lock); 1129 } 1130 1131 1132 /* 1133 * Create a thread and schedule it for upcall on the KSE given. 1134 * Use our thread's standin so that we don't have to allocate one. 1135 */ 1136 struct thread * 1137 thread_schedule_upcall(struct thread *td, struct kse *ke) 1138 { 1139 struct thread *td2; 1140 int newkse; 1141 1142 mtx_assert(&sched_lock, MA_OWNED); 1143 newkse = (ke != td->td_kse); 1144 1145 /* 1146 * If the owner and kse are BOUND then that thread is planning to 1147 * go to userland and upcalls are not expected. So don't make one. 1148 * If it is not bound then make it so with the spare thread 1149 * anf then borrw back the KSE to allow us to complete some in-kernel 1150 * work. When we complete, the Bound thread will have the chance to 1151 * complete. This thread will sleep as planned. Hopefully there will 1152 * eventually be un unbound thread that can be converted to an 1153 * upcall to report the completion of this thread. 1154 */ 1155 1156 if ((td2 = td->td_standin) != NULL) { 1157 td->td_standin = NULL; 1158 } else { 1159 if (newkse) 1160 panic("no reserve thread when called with a new kse"); 1161 /* 1162 * If called from (e.g.) sleep and we do not have 1163 * a reserve thread, then we've used it, so do not 1164 * create an upcall. 1165 */ 1166 return (NULL); 1167 } 1168 CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)", 1169 td2, td->td_proc->p_pid, td->td_proc->p_comm); 1170 bzero(&td2->td_startzero, 1171 (unsigned)RANGEOF(struct thread, td_startzero, td_endzero)); 1172 bcopy(&td->td_startcopy, &td2->td_startcopy, 1173 (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); 1174 thread_link(td2, ke->ke_ksegrp); 1175 cpu_set_upcall(td2, td->td_pcb); 1176 1177 /* 1178 * XXXKSE do we really need this? (default values for the 1179 * frame). 1180 */ 1181 bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe)); 1182 1183 /* 1184 * Bind the new thread to the KSE, 1185 * and if it's our KSE, lend it back to ourself 1186 * so we can continue running. 1187 */ 1188 td2->td_ucred = crhold(td->td_ucred); 1189 td2->td_flags = TDF_UPCALLING; /* note: BOUND */ 1190 td2->td_kse = ke; 1191 td2->td_state = TDS_CAN_RUN; 1192 td2->td_inhibitors = 0; 1193 ke->ke_owner = td2; 1194 /* 1195 * If called from kse_reassign(), we are working on the current 1196 * KSE so fake that we borrowed it. If called from 1197 * kse_create(), don't, as we have a new kse too. 1198 */ 1199 if (!newkse) { 1200 /* 1201 * This thread will be scheduled when the current thread 1202 * blocks, exits or tries to enter userspace, (which ever 1203 * happens first). When that happens the KSe will "revert" 1204 * to this thread in a BOUND manner. Since we are called 1205 * from msleep() this is going to be "very soon" in nearly 1206 * all cases. 1207 */ 1208 TD_SET_LOAN(td2); 1209 } else { 1210 ke->ke_thread = td2; 1211 ke->ke_state = KES_THREAD; 1212 setrunqueue(td2); 1213 } 1214 return (td2); /* bogus.. should be a void function */ 1215 } 1216 1217 /* 1218 * Schedule an upcall to notify a KSE process recieved signals. 1219 * 1220 * XXX - Modifying a sigset_t like this is totally bogus. 1221 */ 1222 struct thread * 1223 signal_upcall(struct proc *p, int sig) 1224 { 1225 struct thread *td, *td2; 1226 struct kse *ke; 1227 sigset_t ss; 1228 int error; 1229 1230 PROC_LOCK_ASSERT(p, MA_OWNED); 1231 return (NULL); 1232 1233 td = FIRST_THREAD_IN_PROC(p); 1234 ke = td->td_kse; 1235 PROC_UNLOCK(p); 1236 error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t)); 1237 PROC_LOCK(p); 1238 if (error) 1239 return (NULL); 1240 SIGADDSET(ss, sig); 1241 PROC_UNLOCK(p); 1242 error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t)); 1243 PROC_LOCK(p); 1244 if (error) 1245 return (NULL); 1246 if (td->td_standin == NULL) 1247 td->td_standin = thread_alloc(); 1248 mtx_lock_spin(&sched_lock); 1249 td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */ 1250 mtx_unlock_spin(&sched_lock); 1251 return (td2); 1252 } 1253 1254 /* 1255 * setup done on the thread when it enters the kernel. 1256 * XXXKSE Presently only for syscalls but eventually all kernel entries. 1257 */ 1258 void 1259 thread_user_enter(struct proc *p, struct thread *td) 1260 { 1261 struct kse *ke; 1262 1263 /* 1264 * First check that we shouldn't just abort. 1265 * But check if we are the single thread first! 1266 * XXX p_singlethread not locked, but should be safe. 1267 */ 1268 if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) { 1269 PROC_LOCK(p); 1270 mtx_lock_spin(&sched_lock); 1271 thread_exit(); 1272 /* NOTREACHED */ 1273 } 1274 1275 /* 1276 * If we are doing a syscall in a KSE environment, 1277 * note where our mailbox is. There is always the 1278 * possibility that we could do this lazily (in kse_reassign()), 1279 * but for now do it every time. 1280 */ 1281 ke = td->td_kse; 1282 td->td_flags &= ~TDF_UNBOUND; 1283 if (ke->ke_mailbox != NULL) { 1284 #if 0 1285 td->td_mailbox = (void *)fuword((caddr_t)ke->ke_mailbox 1286 + offsetof(struct kse_mailbox, km_curthread)); 1287 #else /* if user pointer arithmetic is ok in the kernel */ 1288 td->td_mailbox = 1289 (void *)fuword( (void *)&ke->ke_mailbox->km_curthread); 1290 #endif 1291 if ((td->td_mailbox == NULL) || 1292 (td->td_mailbox == (void *)-1)) { 1293 td->td_mailbox = NULL; /* single thread it.. */ 1294 mtx_lock_spin(&sched_lock); 1295 td->td_flags &= ~(TDF_UNBOUND|TDF_CAN_UNBIND); 1296 mtx_unlock_spin(&sched_lock); 1297 } else { 1298 /* 1299 * when thread limit reached, act like that the thread 1300 * has already done an upcall. 1301 */ 1302 if (p->p_numthreads > max_threads_per_proc) { 1303 if (td->td_standin != NULL) { 1304 thread_stash(td->td_standin); 1305 td->td_standin = NULL; 1306 } 1307 } else { 1308 if (td->td_standin == NULL) 1309 td->td_standin = thread_alloc(); 1310 } 1311 mtx_lock_spin(&sched_lock); 1312 td->td_flags |= TDF_CAN_UNBIND; 1313 mtx_unlock_spin(&sched_lock); 1314 KASSERT((ke->ke_owner == td), 1315 ("thread_user_enter: No starting owner ")); 1316 ke->ke_owner = td; 1317 td->td_usticks = 0; 1318 } 1319 } 1320 } 1321 1322 /* 1323 * The extra work we go through if we are a threaded process when we 1324 * return to userland. 1325 * 1326 * If we are a KSE process and returning to user mode, check for 1327 * extra work to do before we return (e.g. for more syscalls 1328 * to complete first). If we were in a critical section, we should 1329 * just return to let it finish. Same if we were in the UTS (in 1330 * which case the mailbox's context's busy indicator will be set). 1331 * The only traps we suport will have set the mailbox. 1332 * We will clear it here. 1333 */ 1334 int 1335 thread_userret(struct thread *td, struct trapframe *frame) 1336 { 1337 int error; 1338 int unbound; 1339 struct kse *ke; 1340 struct ksegrp *kg; 1341 struct thread *worktodo; 1342 struct proc *p; 1343 struct timespec ts; 1344 1345 KASSERT((td->td_kse && td->td_kse->ke_thread && td->td_kse->ke_owner), 1346 ("thread_userret: bad thread/kse pointers")); 1347 KASSERT((td == curthread), 1348 ("thread_userret: bad thread argument")); 1349 1350 1351 kg = td->td_ksegrp; 1352 p = td->td_proc; 1353 error = 0; 1354 unbound = TD_IS_UNBOUND(td); 1355 1356 mtx_lock_spin(&sched_lock); 1357 if ((worktodo = kg->kg_last_assigned)) 1358 worktodo = TAILQ_NEXT(worktodo, td_runq); 1359 else 1360 worktodo = TAILQ_FIRST(&kg->kg_runq); 1361 1362 /* 1363 * Permanently bound threads never upcall but they may 1364 * loan out their KSE at this point. 1365 * Upcalls imply bound.. They also may want to do some Philantropy. 1366 * Temporarily bound threads on the other hand either yield 1367 * to other work and transform into an upcall, or proceed back to 1368 * userland. 1369 */ 1370 1371 if (TD_CAN_UNBIND(td)) { 1372 td->td_flags &= ~(TDF_UNBOUND|TDF_CAN_UNBIND); 1373 if (!worktodo && (kg->kg_completed == NULL) && 1374 !(td->td_kse->ke_flags & KEF_DOUPCALL)) { 1375 /* 1376 * This thread has not started any upcall. 1377 * If there is no work to report other than 1378 * ourself, then it can return direct to userland. 1379 */ 1380 justreturn: 1381 mtx_unlock_spin(&sched_lock); 1382 thread_update_uticks(); 1383 td->td_mailbox = NULL; 1384 return (0); 1385 } 1386 mtx_unlock_spin(&sched_lock); 1387 error = thread_export_context(td); 1388 td->td_usticks = 0; 1389 if (error) { 1390 /* 1391 * As we are not running on a borrowed KSE, 1392 * failing to do the KSE operation just defaults 1393 * back to synchonous operation, so just return from 1394 * the syscall. 1395 */ 1396 goto justreturn; 1397 } 1398 mtx_lock_spin(&sched_lock); 1399 /* 1400 * Turn ourself into a bound upcall. 1401 * We will rely on kse_reassign() 1402 * to make us run at a later time. 1403 */ 1404 td->td_flags |= TDF_UPCALLING; 1405 1406 /* there may be more work since we re-locked schedlock */ 1407 if ((worktodo = kg->kg_last_assigned)) 1408 worktodo = TAILQ_NEXT(worktodo, td_runq); 1409 else 1410 worktodo = TAILQ_FIRST(&kg->kg_runq); 1411 } else if (unbound) { 1412 /* 1413 * We are an unbound thread, looking to 1414 * return to user space. There must be another owner 1415 * of this KSE. 1416 * We are using a borrowed KSE. save state and exit. 1417 * kse_reassign() will recycle the kse as needed, 1418 */ 1419 mtx_unlock_spin(&sched_lock); 1420 error = thread_export_context(td); 1421 td->td_usticks = 0; 1422 if (error) { 1423 /* 1424 * There is nothing we can do. 1425 * We just lose that context. We 1426 * probably should note this somewhere and send 1427 * the process a signal. 1428 */ 1429 PROC_LOCK(td->td_proc); 1430 psignal(td->td_proc, SIGSEGV); 1431 mtx_lock_spin(&sched_lock); 1432 ke = td->td_kse; 1433 /* possibly upcall with error? */ 1434 } else { 1435 /* 1436 * Don't make an upcall, just exit so that the owner 1437 * can get its KSE if it wants it. 1438 * Our context is already safely stored for later 1439 * use by the UTS. 1440 */ 1441 PROC_LOCK(p); 1442 mtx_lock_spin(&sched_lock); 1443 ke = td->td_kse; 1444 } 1445 /* 1446 * If the owner is idling, we now have something for it 1447 * to report, so make it runnable. 1448 * If the owner is not an upcall, make an attempt to 1449 * ensure that at least one of any IDLED upcalls can 1450 * wake up. 1451 */ 1452 if (ke->ke_owner->td_flags & TDF_UPCALLING) { 1453 TD_CLR_IDLE(ke->ke_owner); 1454 } else { 1455 FOREACH_KSE_IN_GROUP(kg, ke) { 1456 if (TD_IS_IDLE(ke->ke_owner)) { 1457 TD_CLR_IDLE(ke->ke_owner); 1458 setrunnable(ke->ke_owner); 1459 break; 1460 } 1461 } 1462 } 1463 thread_exit(); 1464 } 1465 /* 1466 * We ARE going back to userland with this KSE. 1467 * We are permanently bound. We may be an upcall. 1468 * If an upcall, check for threads that need to borrow the KSE. 1469 * Any other thread that comes ready after this missed the boat. 1470 */ 1471 ke = td->td_kse; 1472 1473 /* 1474 * If not upcalling, go back to userspace. 1475 * If we are, get the upcall set up. 1476 */ 1477 if (td->td_flags & TDF_UPCALLING) { 1478 if (worktodo) { 1479 /* 1480 * force a switch to more urgent 'in kernel' 1481 * work. Control will return to this thread 1482 * when there is no more work to do. 1483 * kse_reassign() will do that for us. 1484 */ 1485 TD_SET_LOAN(td); 1486 p->p_stats->p_ru.ru_nvcsw++; 1487 mi_switch(); /* kse_reassign() will (re)find worktodo */ 1488 } 1489 td->td_flags &= ~TDF_UPCALLING; 1490 if (ke->ke_flags & KEF_DOUPCALL) 1491 ke->ke_flags &= ~KEF_DOUPCALL; 1492 mtx_unlock_spin(&sched_lock); 1493 1494 /* 1495 * There is no more work to do and we are going to ride 1496 * this thread/KSE up to userland as an upcall. 1497 * Do the last parts of the setup needed for the upcall. 1498 */ 1499 CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)", 1500 td, td->td_proc->p_pid, td->td_proc->p_comm); 1501 1502 /* 1503 * Set user context to the UTS. 1504 * Will use Giant in cpu_thread_clean() because it uses 1505 * kmem_free(kernel_map, ...) 1506 */ 1507 cpu_set_upcall_kse(td, ke); 1508 1509 /* 1510 * Unhook the list of completed threads. 1511 * anything that completes after this gets to 1512 * come in next time. 1513 * Put the list of completed thread mailboxes on 1514 * this KSE's mailbox. 1515 */ 1516 error = thread_link_mboxes(kg, ke); 1517 if (error) 1518 goto bad; 1519 1520 /* 1521 * Set state and clear the thread mailbox pointer. 1522 * From now on we are just a bound outgoing process. 1523 * **Problem** userret is often called several times. 1524 * it would be nice if this all happenned only on the first 1525 * time through. (the scan for extra work etc.) 1526 */ 1527 #if 0 1528 error = suword((caddr_t)ke->ke_mailbox + 1529 offsetof(struct kse_mailbox, km_curthread), 0); 1530 #else /* if user pointer arithmetic is ok in the kernel */ 1531 error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0); 1532 #endif 1533 ke->ke_uuticks = ke->ke_usticks = 0; 1534 if (error) 1535 goto bad; 1536 nanotime(&ts); 1537 if (copyout(&ts, 1538 (caddr_t)&ke->ke_mailbox->km_timeofday, sizeof(ts))) { 1539 goto bad; 1540 } 1541 } else { 1542 mtx_unlock_spin(&sched_lock); 1543 } 1544 /* 1545 * Optimisation: 1546 * Ensure that we have a spare thread available, 1547 * for when we re-enter the kernel. 1548 */ 1549 if (td->td_standin == NULL) { 1550 td->td_standin = thread_alloc(); 1551 } 1552 1553 thread_update_uticks(); 1554 td->td_mailbox = NULL; 1555 return (0); 1556 1557 bad: 1558 /* 1559 * Things are going to be so screwed we should just kill the process. 1560 * how do we do that? 1561 */ 1562 PROC_LOCK(td->td_proc); 1563 psignal(td->td_proc, SIGSEGV); 1564 PROC_UNLOCK(td->td_proc); 1565 td->td_mailbox = NULL; 1566 return (error); /* go sync */ 1567 } 1568 1569 /* 1570 * Enforce single-threading. 1571 * 1572 * Returns 1 if the caller must abort (another thread is waiting to 1573 * exit the process or similar). Process is locked! 1574 * Returns 0 when you are successfully the only thread running. 1575 * A process has successfully single threaded in the suspend mode when 1576 * There are no threads in user mode. Threads in the kernel must be 1577 * allowed to continue until they get to the user boundary. They may even 1578 * copy out their return values and data before suspending. They may however be 1579 * accellerated in reaching the user boundary as we will wake up 1580 * any sleeping threads that are interruptable. (PCATCH). 1581 */ 1582 int 1583 thread_single(int force_exit) 1584 { 1585 struct thread *td; 1586 struct thread *td2; 1587 struct proc *p; 1588 1589 td = curthread; 1590 p = td->td_proc; 1591 mtx_assert(&Giant, MA_OWNED); 1592 PROC_LOCK_ASSERT(p, MA_OWNED); 1593 KASSERT((td != NULL), ("curthread is NULL")); 1594 1595 if ((p->p_flag & P_KSES) == 0) 1596 return (0); 1597 1598 /* Is someone already single threading? */ 1599 if (p->p_singlethread) 1600 return (1); 1601 1602 if (force_exit == SINGLE_EXIT) { 1603 p->p_flag |= P_SINGLE_EXIT; 1604 td->td_flags &= ~TDF_UNBOUND; 1605 } else 1606 p->p_flag &= ~P_SINGLE_EXIT; 1607 p->p_flag |= P_STOPPED_SINGLE; 1608 p->p_singlethread = td; 1609 /* XXXKSE Which lock protects the below values? */ 1610 while ((p->p_numthreads - p->p_suspcount) != 1) { 1611 mtx_lock_spin(&sched_lock); 1612 FOREACH_THREAD_IN_PROC(p, td2) { 1613 if (td2 == td) 1614 continue; 1615 if (TD_IS_INHIBITED(td2)) { 1616 if (force_exit == SINGLE_EXIT) { 1617 if (TD_IS_SUSPENDED(td2)) { 1618 thread_unsuspend_one(td2); 1619 } 1620 if (TD_ON_SLEEPQ(td2) && 1621 (td2->td_flags & TDF_SINTR)) { 1622 if (td2->td_flags & TDF_CVWAITQ) 1623 cv_abort(td2); 1624 else 1625 abortsleep(td2); 1626 } 1627 if (TD_IS_IDLE(td2)) { 1628 TD_CLR_IDLE(td2); 1629 } 1630 } else { 1631 if (TD_IS_SUSPENDED(td2)) 1632 continue; 1633 /* maybe other inhibitted states too? */ 1634 if (td2->td_inhibitors & 1635 (TDI_SLEEPING | TDI_SWAPPED | 1636 TDI_LOAN | TDI_IDLE | 1637 TDI_EXITING)) 1638 thread_suspend_one(td2); 1639 } 1640 } 1641 } 1642 /* 1643 * Maybe we suspended some threads.. was it enough? 1644 */ 1645 if ((p->p_numthreads - p->p_suspcount) == 1) { 1646 mtx_unlock_spin(&sched_lock); 1647 break; 1648 } 1649 1650 /* 1651 * Wake us up when everyone else has suspended. 1652 * In the mean time we suspend as well. 1653 */ 1654 thread_suspend_one(td); 1655 mtx_unlock(&Giant); 1656 PROC_UNLOCK(p); 1657 p->p_stats->p_ru.ru_nvcsw++; 1658 mi_switch(); 1659 mtx_unlock_spin(&sched_lock); 1660 mtx_lock(&Giant); 1661 PROC_LOCK(p); 1662 } 1663 if (force_exit == SINGLE_EXIT) 1664 kse_purge(p, td); 1665 return (0); 1666 } 1667 1668 /* 1669 * Called in from locations that can safely check to see 1670 * whether we have to suspend or at least throttle for a 1671 * single-thread event (e.g. fork). 1672 * 1673 * Such locations include userret(). 1674 * If the "return_instead" argument is non zero, the thread must be able to 1675 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 1676 * 1677 * The 'return_instead' argument tells the function if it may do a 1678 * thread_exit() or suspend, or whether the caller must abort and back 1679 * out instead. 1680 * 1681 * If the thread that set the single_threading request has set the 1682 * P_SINGLE_EXIT bit in the process flags then this call will never return 1683 * if 'return_instead' is false, but will exit. 1684 * 1685 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 1686 *---------------+--------------------+--------------------- 1687 * 0 | returns 0 | returns 0 or 1 1688 * | when ST ends | immediatly 1689 *---------------+--------------------+--------------------- 1690 * 1 | thread exits | returns 1 1691 * | | immediatly 1692 * 0 = thread_exit() or suspension ok, 1693 * other = return error instead of stopping the thread. 1694 * 1695 * While a full suspension is under effect, even a single threading 1696 * thread would be suspended if it made this call (but it shouldn't). 1697 * This call should only be made from places where 1698 * thread_exit() would be safe as that may be the outcome unless 1699 * return_instead is set. 1700 */ 1701 int 1702 thread_suspend_check(int return_instead) 1703 { 1704 struct thread *td; 1705 struct proc *p; 1706 struct kse *ke; 1707 struct ksegrp *kg; 1708 1709 td = curthread; 1710 p = td->td_proc; 1711 kg = td->td_ksegrp; 1712 PROC_LOCK_ASSERT(p, MA_OWNED); 1713 while (P_SHOULDSTOP(p)) { 1714 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1715 KASSERT(p->p_singlethread != NULL, 1716 ("singlethread not set")); 1717 /* 1718 * The only suspension in action is a 1719 * single-threading. Single threader need not stop. 1720 * XXX Should be safe to access unlocked 1721 * as it can only be set to be true by us. 1722 */ 1723 if (p->p_singlethread == td) 1724 return (0); /* Exempt from stopping. */ 1725 } 1726 if (return_instead) 1727 return (1); 1728 1729 /* 1730 * If the process is waiting for us to exit, 1731 * this thread should just suicide. 1732 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 1733 */ 1734 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1735 mtx_lock_spin(&sched_lock); 1736 while (mtx_owned(&Giant)) 1737 mtx_unlock(&Giant); 1738 /* 1739 * All threads should be exiting 1740 * Unless they are the active "singlethread". 1741 * destroy un-needed KSEs as we go.. 1742 * KSEGRPS may implode too as #kses -> 0. 1743 */ 1744 ke = td->td_kse; 1745 if (ke->ke_owner == td && 1746 (kg->kg_kses >= kg->kg_numthreads )) 1747 ke->ke_flags |= KEF_EXIT; 1748 thread_exit(); 1749 } 1750 1751 /* 1752 * When a thread suspends, it just 1753 * moves to the processes's suspend queue 1754 * and stays there. 1755 * 1756 * XXXKSE if TDF_BOUND is true 1757 * it will not release it's KSE which might 1758 * lead to deadlock if there are not enough KSEs 1759 * to complete all waiting threads. 1760 * Maybe be able to 'lend' it out again. 1761 * (lent kse's can not go back to userland?) 1762 * and can only be lent in STOPPED state. 1763 */ 1764 mtx_lock_spin(&sched_lock); 1765 if ((p->p_flag & P_STOPPED_SIG) && 1766 (p->p_suspcount+1 == p->p_numthreads)) { 1767 mtx_unlock_spin(&sched_lock); 1768 PROC_LOCK(p->p_pptr); 1769 if ((p->p_pptr->p_procsig->ps_flag & 1770 PS_NOCLDSTOP) == 0) { 1771 psignal(p->p_pptr, SIGCHLD); 1772 } 1773 PROC_UNLOCK(p->p_pptr); 1774 mtx_lock_spin(&sched_lock); 1775 } 1776 mtx_assert(&Giant, MA_NOTOWNED); 1777 thread_suspend_one(td); 1778 PROC_UNLOCK(p); 1779 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1780 if (p->p_numthreads == p->p_suspcount) { 1781 thread_unsuspend_one(p->p_singlethread); 1782 } 1783 } 1784 p->p_stats->p_ru.ru_nivcsw++; 1785 mi_switch(); 1786 mtx_unlock_spin(&sched_lock); 1787 PROC_LOCK(p); 1788 } 1789 return (0); 1790 } 1791 1792 void 1793 thread_suspend_one(struct thread *td) 1794 { 1795 struct proc *p = td->td_proc; 1796 1797 mtx_assert(&sched_lock, MA_OWNED); 1798 p->p_suspcount++; 1799 TD_SET_SUSPENDED(td); 1800 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 1801 /* 1802 * Hack: If we are suspending but are on the sleep queue 1803 * then we are in msleep or the cv equivalent. We 1804 * want to look like we have two Inhibitors. 1805 * May already be set.. doesn't matter. 1806 */ 1807 if (TD_ON_SLEEPQ(td)) 1808 TD_SET_SLEEPING(td); 1809 } 1810 1811 void 1812 thread_unsuspend_one(struct thread *td) 1813 { 1814 struct proc *p = td->td_proc; 1815 1816 mtx_assert(&sched_lock, MA_OWNED); 1817 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1818 TD_CLR_SUSPENDED(td); 1819 p->p_suspcount--; 1820 setrunnable(td); 1821 } 1822 1823 /* 1824 * Allow all threads blocked by single threading to continue running. 1825 */ 1826 void 1827 thread_unsuspend(struct proc *p) 1828 { 1829 struct thread *td; 1830 1831 mtx_assert(&sched_lock, MA_OWNED); 1832 PROC_LOCK_ASSERT(p, MA_OWNED); 1833 if (!P_SHOULDSTOP(p)) { 1834 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1835 thread_unsuspend_one(td); 1836 } 1837 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1838 (p->p_numthreads == p->p_suspcount)) { 1839 /* 1840 * Stopping everything also did the job for the single 1841 * threading request. Now we've downgraded to single-threaded, 1842 * let it continue. 1843 */ 1844 thread_unsuspend_one(p->p_singlethread); 1845 } 1846 } 1847 1848 void 1849 thread_single_end(void) 1850 { 1851 struct thread *td; 1852 struct proc *p; 1853 1854 td = curthread; 1855 p = td->td_proc; 1856 PROC_LOCK_ASSERT(p, MA_OWNED); 1857 p->p_flag &= ~P_STOPPED_SINGLE; 1858 p->p_singlethread = NULL; 1859 /* 1860 * If there are other threads they mey now run, 1861 * unless of course there is a blanket 'stop order' 1862 * on the process. The single threader must be allowed 1863 * to continue however as this is a bad place to stop. 1864 */ 1865 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 1866 mtx_lock_spin(&sched_lock); 1867 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1868 thread_unsuspend_one(td); 1869 } 1870 mtx_unlock_spin(&sched_lock); 1871 } 1872 } 1873 1874 1875