1 /* 2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice(s), this list of conditions and the following disclaimer as 10 * the first lines of this file unmodified other than the possible 11 * addition of one or more copyright notices. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice(s), this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/smp.h> 39 #include <sys/sysctl.h> 40 #include <sys/sysproto.h> 41 #include <sys/filedesc.h> 42 #include <sys/sched.h> 43 #include <sys/signalvar.h> 44 #include <sys/sx.h> 45 #include <sys/tty.h> 46 #include <sys/user.h> 47 #include <sys/jail.h> 48 #include <sys/kse.h> 49 #include <sys/ktr.h> 50 #include <sys/ucontext.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_object.h> 54 #include <vm/pmap.h> 55 #include <vm/uma.h> 56 #include <vm/vm_map.h> 57 58 #include <machine/frame.h> 59 60 /* 61 * KSEGRP related storage. 62 */ 63 static uma_zone_t ksegrp_zone; 64 static uma_zone_t kse_zone; 65 static uma_zone_t thread_zone; 66 static uma_zone_t upcall_zone; 67 68 /* DEBUG ONLY */ 69 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 70 static int thread_debug = 0; 71 SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW, 72 &thread_debug, 0, "thread debug"); 73 74 static int max_threads_per_proc = 30; 75 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 76 &max_threads_per_proc, 0, "Limit on threads per proc"); 77 78 static int max_groups_per_proc = 5; 79 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 80 &max_groups_per_proc, 0, "Limit on thread groups per proc"); 81 82 static int virtual_cpu; 83 84 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 85 86 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 87 TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); 88 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 89 TAILQ_HEAD(, kse_upcall) zombie_upcalls = 90 TAILQ_HEAD_INITIALIZER(zombie_upcalls); 91 struct mtx kse_zombie_lock; 92 MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN); 93 94 static void kse_purge(struct proc *p, struct thread *td); 95 static void kse_purge_group(struct thread *td); 96 static int thread_update_usr_ticks(struct thread *td); 97 static int thread_update_sys_ticks(struct thread *td); 98 static void thread_alloc_spare(struct thread *td, struct thread *spare); 99 100 static int 101 sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS) 102 { 103 int error, new_val; 104 int def_val; 105 106 #ifdef SMP 107 def_val = mp_ncpus; 108 #else 109 def_val = 1; 110 #endif 111 if (virtual_cpu == 0) 112 new_val = def_val; 113 else 114 new_val = virtual_cpu; 115 error = sysctl_handle_int(oidp, &new_val, 0, req); 116 if (error != 0 || req->newptr == NULL) 117 return (error); 118 if (new_val < 0) 119 return (EINVAL); 120 virtual_cpu = new_val; 121 return (0); 122 } 123 124 /* DEBUG ONLY */ 125 SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW, 126 0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I", 127 "debug virtual cpus"); 128 129 /* 130 * Prepare a thread for use. 131 */ 132 static void 133 thread_ctor(void *mem, int size, void *arg) 134 { 135 struct thread *td; 136 137 td = (struct thread *)mem; 138 td->td_state = TDS_INACTIVE; 139 } 140 141 /* 142 * Reclaim a thread after use. 143 */ 144 static void 145 thread_dtor(void *mem, int size, void *arg) 146 { 147 struct thread *td; 148 149 td = (struct thread *)mem; 150 151 #ifdef INVARIANTS 152 /* Verify that this thread is in a safe state to free. */ 153 switch (td->td_state) { 154 case TDS_INHIBITED: 155 case TDS_RUNNING: 156 case TDS_CAN_RUN: 157 case TDS_RUNQ: 158 /* 159 * We must never unlink a thread that is in one of 160 * these states, because it is currently active. 161 */ 162 panic("bad state for thread unlinking"); 163 /* NOTREACHED */ 164 case TDS_INACTIVE: 165 break; 166 default: 167 panic("bad thread state"); 168 /* NOTREACHED */ 169 } 170 #endif 171 } 172 173 /* 174 * Initialize type-stable parts of a thread (when newly created). 175 */ 176 static void 177 thread_init(void *mem, int size) 178 { 179 struct thread *td; 180 181 td = (struct thread *)mem; 182 mtx_lock(&Giant); 183 pmap_new_thread(td, 0); 184 mtx_unlock(&Giant); 185 cpu_thread_setup(td); 186 td->td_sched = (struct td_sched *)&td[1]; 187 } 188 189 /* 190 * Tear down type-stable parts of a thread (just before being discarded). 191 */ 192 static void 193 thread_fini(void *mem, int size) 194 { 195 struct thread *td; 196 197 td = (struct thread *)mem; 198 pmap_dispose_thread(td); 199 } 200 201 /* 202 * Initialize type-stable parts of a kse (when newly created). 203 */ 204 static void 205 kse_init(void *mem, int size) 206 { 207 struct kse *ke; 208 209 ke = (struct kse *)mem; 210 ke->ke_sched = (struct ke_sched *)&ke[1]; 211 } 212 213 /* 214 * Initialize type-stable parts of a ksegrp (when newly created). 215 */ 216 static void 217 ksegrp_init(void *mem, int size) 218 { 219 struct ksegrp *kg; 220 221 kg = (struct ksegrp *)mem; 222 kg->kg_sched = (struct kg_sched *)&kg[1]; 223 } 224 225 /* 226 * KSE is linked into kse group. 227 */ 228 void 229 kse_link(struct kse *ke, struct ksegrp *kg) 230 { 231 struct proc *p = kg->kg_proc; 232 233 TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); 234 kg->kg_kses++; 235 ke->ke_state = KES_UNQUEUED; 236 ke->ke_proc = p; 237 ke->ke_ksegrp = kg; 238 ke->ke_thread = NULL; 239 ke->ke_oncpu = NOCPU; 240 ke->ke_flags = 0; 241 } 242 243 void 244 kse_unlink(struct kse *ke) 245 { 246 struct ksegrp *kg; 247 248 mtx_assert(&sched_lock, MA_OWNED); 249 kg = ke->ke_ksegrp; 250 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 251 if (ke->ke_state == KES_IDLE) { 252 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 253 kg->kg_idle_kses--; 254 } 255 if (--kg->kg_kses == 0) 256 ksegrp_unlink(kg); 257 /* 258 * Aggregate stats from the KSE 259 */ 260 kse_stash(ke); 261 } 262 263 void 264 ksegrp_link(struct ksegrp *kg, struct proc *p) 265 { 266 267 TAILQ_INIT(&kg->kg_threads); 268 TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 269 TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */ 270 TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ 271 TAILQ_INIT(&kg->kg_iq); /* all idle kses in ksegrp */ 272 TAILQ_INIT(&kg->kg_upcalls); /* all upcall structure in ksegrp */ 273 kg->kg_proc = p; 274 /* 275 * the following counters are in the -zero- section 276 * and may not need clearing 277 */ 278 kg->kg_numthreads = 0; 279 kg->kg_runnable = 0; 280 kg->kg_kses = 0; 281 kg->kg_runq_kses = 0; /* XXXKSE change name */ 282 kg->kg_idle_kses = 0; 283 kg->kg_numupcalls = 0; 284 /* link it in now that it's consistent */ 285 p->p_numksegrps++; 286 TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 287 } 288 289 void 290 ksegrp_unlink(struct ksegrp *kg) 291 { 292 struct proc *p; 293 294 mtx_assert(&sched_lock, MA_OWNED); 295 KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads")); 296 KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses")); 297 KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls")); 298 299 p = kg->kg_proc; 300 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 301 p->p_numksegrps--; 302 /* 303 * Aggregate stats from the KSE 304 */ 305 ksegrp_stash(kg); 306 } 307 308 struct kse_upcall * 309 upcall_alloc(void) 310 { 311 struct kse_upcall *ku; 312 313 ku = uma_zalloc(upcall_zone, 0); 314 bzero(ku, sizeof(*ku)); 315 return (ku); 316 } 317 318 void 319 upcall_free(struct kse_upcall *ku) 320 { 321 322 uma_zfree(upcall_zone, ku); 323 } 324 325 void 326 upcall_link(struct kse_upcall *ku, struct ksegrp *kg) 327 { 328 329 mtx_assert(&sched_lock, MA_OWNED); 330 TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link); 331 ku->ku_ksegrp = kg; 332 kg->kg_numupcalls++; 333 } 334 335 void 336 upcall_unlink(struct kse_upcall *ku) 337 { 338 struct ksegrp *kg = ku->ku_ksegrp; 339 340 mtx_assert(&sched_lock, MA_OWNED); 341 KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__)); 342 TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link); 343 kg->kg_numupcalls--; 344 upcall_stash(ku); 345 } 346 347 void 348 upcall_remove(struct thread *td) 349 { 350 351 if (td->td_upcall) { 352 td->td_upcall->ku_owner = NULL; 353 upcall_unlink(td->td_upcall); 354 td->td_upcall = 0; 355 } 356 } 357 358 /* 359 * For a newly created process, 360 * link up all the structures and its initial threads etc. 361 */ 362 void 363 proc_linkup(struct proc *p, struct ksegrp *kg, 364 struct kse *ke, struct thread *td) 365 { 366 367 TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 368 TAILQ_INIT(&p->p_threads); /* all threads in proc */ 369 TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 370 p->p_numksegrps = 0; 371 p->p_numthreads = 0; 372 373 ksegrp_link(kg, p); 374 kse_link(ke, kg); 375 thread_link(td, kg); 376 } 377 378 /* 379 struct kse_thr_interrupt_args { 380 struct kse_thr_mailbox * tmbx; 381 }; 382 */ 383 int 384 kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap) 385 { 386 struct proc *p; 387 struct thread *td2; 388 389 p = td->td_proc; 390 if (!(p->p_flag & P_KSES) || (uap->tmbx == NULL)) 391 return (EINVAL); 392 mtx_lock_spin(&sched_lock); 393 FOREACH_THREAD_IN_PROC(p, td2) { 394 if (td2->td_mailbox == uap->tmbx) { 395 td2->td_flags |= TDF_INTERRUPT; 396 if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) { 397 if (td2->td_flags & TDF_CVWAITQ) 398 cv_abort(td2); 399 else 400 abortsleep(td2); 401 } 402 mtx_unlock_spin(&sched_lock); 403 return (0); 404 } 405 } 406 mtx_unlock_spin(&sched_lock); 407 return (ESRCH); 408 } 409 410 /* 411 struct kse_exit_args { 412 register_t dummy; 413 }; 414 */ 415 int 416 kse_exit(struct thread *td, struct kse_exit_args *uap) 417 { 418 struct proc *p; 419 struct ksegrp *kg; 420 struct kse *ke; 421 422 p = td->td_proc; 423 /* 424 * Only UTS can call the syscall and current group 425 * should be a threaded group. 426 */ 427 if ((td->td_mailbox != NULL) || (td->td_ksegrp->kg_numupcalls == 0)) 428 return (EINVAL); 429 KASSERT((td->td_upcall != NULL), ("%s: not own an upcall", __func__)); 430 431 kg = td->td_ksegrp; 432 /* Serialize removing upcall */ 433 PROC_LOCK(p); 434 mtx_lock_spin(&sched_lock); 435 if ((kg->kg_numupcalls == 1) && (kg->kg_numthreads > 1)) { 436 mtx_unlock_spin(&sched_lock); 437 PROC_UNLOCK(p); 438 return (EDEADLK); 439 } 440 ke = td->td_kse; 441 upcall_remove(td); 442 if (p->p_numthreads == 1) { 443 kse_purge(p, td); 444 p->p_flag &= ~P_KSES; 445 mtx_unlock_spin(&sched_lock); 446 PROC_UNLOCK(p); 447 } else { 448 if (kg->kg_numthreads == 1) { /* Shutdown a group */ 449 kse_purge_group(td); 450 ke->ke_flags |= KEF_EXIT; 451 } 452 thread_exit(); 453 /* NOTREACHED */ 454 } 455 return (0); 456 } 457 458 /* 459 * Either becomes an upcall or waits for an awakening event and 460 * then becomes an upcall. Only error cases return. 461 */ 462 /* 463 struct kse_release_args { 464 register_t dummy; 465 }; 466 */ 467 int 468 kse_release(struct thread *td, struct kse_release_args *uap) 469 { 470 struct proc *p; 471 struct ksegrp *kg; 472 473 p = td->td_proc; 474 kg = td->td_ksegrp; 475 /* 476 * Only UTS can call the syscall and current group 477 * should be a threaded group. 478 */ 479 if ((td->td_mailbox != NULL) || (td->td_ksegrp->kg_numupcalls == 0)) 480 return (EINVAL); 481 KASSERT((td->td_upcall != NULL), ("%s: not own an upcall", __func__)); 482 483 PROC_LOCK(p); 484 mtx_lock_spin(&sched_lock); 485 /* Change OURSELF to become an upcall. */ 486 td->td_flags = TDF_UPCALLING; 487 if ((td->td_upcall->ku_flags & KUF_DOUPCALL) == 0 && 488 (kg->kg_completed == NULL)) { 489 kg->kg_upsleeps++; 490 mtx_unlock_spin(&sched_lock); 491 msleep(&kg->kg_completed, &p->p_mtx, PPAUSE|PCATCH, "ksepause", 492 NULL); 493 kg->kg_upsleeps--; 494 PROC_UNLOCK(p); 495 } else { 496 mtx_unlock_spin(&sched_lock); 497 PROC_UNLOCK(p); 498 } 499 return (0); 500 } 501 502 /* struct kse_wakeup_args { 503 struct kse_mailbox *mbx; 504 }; */ 505 int 506 kse_wakeup(struct thread *td, struct kse_wakeup_args *uap) 507 { 508 struct proc *p; 509 struct ksegrp *kg; 510 struct kse_upcall *ku; 511 struct thread *td2; 512 513 p = td->td_proc; 514 td2 = NULL; 515 ku = NULL; 516 /* KSE-enabled processes only, please. */ 517 if (!(p->p_flag & P_KSES)) 518 return (EINVAL); 519 520 PROC_LOCK(p); 521 mtx_lock_spin(&sched_lock); 522 if (uap->mbx) { 523 FOREACH_KSEGRP_IN_PROC(p, kg) { 524 FOREACH_UPCALL_IN_GROUP(kg, ku) { 525 if (ku->ku_mailbox == uap->mbx) 526 break; 527 } 528 if (ku) 529 break; 530 } 531 } else { 532 kg = td->td_ksegrp; 533 if (kg->kg_upsleeps) { 534 wakeup_one(&kg->kg_completed); 535 mtx_unlock_spin(&sched_lock); 536 PROC_UNLOCK(p); 537 return (0); 538 } 539 ku = TAILQ_FIRST(&kg->kg_upcalls); 540 } 541 if (ku) { 542 if ((td2 = ku->ku_owner) == NULL) { 543 panic("%s: no owner", __func__); 544 } else if (TD_ON_SLEEPQ(td2) && 545 (td2->td_wchan == &kg->kg_completed)) { 546 abortsleep(td2); 547 } else { 548 ku->ku_flags |= KUF_DOUPCALL; 549 } 550 mtx_unlock_spin(&sched_lock); 551 PROC_UNLOCK(p); 552 return (0); 553 } 554 mtx_unlock_spin(&sched_lock); 555 PROC_UNLOCK(p); 556 return (ESRCH); 557 } 558 559 /* 560 * No new KSEG: first call: use current KSE, don't schedule an upcall 561 * All other situations, do allocate max new KSEs and schedule an upcall. 562 */ 563 /* struct kse_create_args { 564 struct kse_mailbox *mbx; 565 int newgroup; 566 }; */ 567 int 568 kse_create(struct thread *td, struct kse_create_args *uap) 569 { 570 struct kse *newke; 571 struct ksegrp *newkg; 572 struct ksegrp *kg; 573 struct proc *p; 574 struct kse_mailbox mbx; 575 struct kse_upcall *newku; 576 int err, ncpus; 577 578 p = td->td_proc; 579 if ((err = copyin(uap->mbx, &mbx, sizeof(mbx)))) 580 return (err); 581 582 /* Too bad, why hasn't kernel always a cpu counter !? */ 583 #ifdef SMP 584 ncpus = mp_ncpus; 585 #else 586 ncpus = 1; 587 #endif 588 if (thread_debug && virtual_cpu != 0) 589 ncpus = virtual_cpu; 590 591 /* Easier to just set it than to test and set */ 592 p->p_flag |= P_KSES; 593 kg = td->td_ksegrp; 594 if (uap->newgroup) { 595 /* Have race condition but it is cheap */ 596 if (p->p_numksegrps >= max_groups_per_proc) 597 return (EPROCLIM); 598 /* 599 * If we want a new KSEGRP it doesn't matter whether 600 * we have already fired up KSE mode before or not. 601 * We put the process in KSE mode and create a new KSEGRP. 602 */ 603 newkg = ksegrp_alloc(); 604 bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp, 605 kg_startzero, kg_endzero)); 606 bcopy(&kg->kg_startcopy, &newkg->kg_startcopy, 607 RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy)); 608 mtx_lock_spin(&sched_lock); 609 ksegrp_link(newkg, p); 610 if (p->p_numksegrps >= max_groups_per_proc) { 611 ksegrp_unlink(newkg); 612 mtx_unlock_spin(&sched_lock); 613 return (EPROCLIM); 614 } 615 mtx_unlock_spin(&sched_lock); 616 } else { 617 newkg = kg; 618 } 619 620 /* 621 * Creating upcalls more than number of physical cpu does 622 * not help performance. 623 */ 624 if (newkg->kg_numupcalls >= ncpus) 625 return (EPROCLIM); 626 627 if (newkg->kg_numupcalls == 0) { 628 /* 629 * Initialize KSE group, optimized for MP. 630 * Create KSEs as many as physical cpus, this increases 631 * concurrent even if userland is not MP safe and can only run 632 * on single CPU (for early version of libpthread, it is true). 633 * In ideal world, every physical cpu should execute a thread. 634 * If there is enough KSEs, threads in kernel can be 635 * executed parallel on different cpus with full speed, 636 * Concurrent in kernel shouldn't be restricted by number of 637 * upcalls userland provides. 638 * Adding more upcall structures only increases concurrent 639 * in userland. 640 * Highest performance configuration is: 641 * N kses = N upcalls = N phyiscal cpus 642 */ 643 while (newkg->kg_kses < ncpus) { 644 newke = kse_alloc(); 645 bzero(&newke->ke_startzero, RANGEOF(struct kse, 646 ke_startzero, ke_endzero)); 647 #if 0 648 mtx_lock_spin(&sched_lock); 649 bcopy(&ke->ke_startcopy, &newke->ke_startcopy, 650 RANGEOF(struct kse, ke_startcopy, ke_endcopy)); 651 mtx_unlock_spin(&sched_lock); 652 #endif 653 mtx_lock_spin(&sched_lock); 654 kse_link(newke, newkg); 655 if (p->p_sflag & PS_NEEDSIGCHK) 656 newke->ke_flags |= KEF_ASTPENDING; 657 /* Add engine */ 658 kse_reassign(newke); 659 mtx_unlock_spin(&sched_lock); 660 } 661 } 662 newku = upcall_alloc(); 663 newku->ku_mailbox = uap->mbx; 664 newku->ku_func = mbx.km_func; 665 bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t)); 666 667 /* For the first call this may not have been set */ 668 if (td->td_standin == NULL) 669 thread_alloc_spare(td, NULL); 670 671 mtx_lock_spin(&sched_lock); 672 if (newkg->kg_numupcalls >= ncpus) { 673 upcall_free(newku); 674 mtx_unlock_spin(&sched_lock); 675 return (EPROCLIM); 676 } 677 upcall_link(newku, newkg); 678 679 /* 680 * Each upcall structure has an owner thread, find which 681 * one owns it. 682 */ 683 if (uap->newgroup) { 684 /* 685 * Because new ksegrp hasn't thread, 686 * create an initial upcall thread to own it. 687 */ 688 thread_schedule_upcall(td, newku); 689 } else { 690 /* 691 * If current thread hasn't an upcall structure, 692 * just assign the upcall to it. 693 */ 694 if (td->td_upcall == NULL) { 695 newku->ku_owner = td; 696 td->td_upcall = newku; 697 } else { 698 /* 699 * Create a new upcall thread to own it. 700 */ 701 thread_schedule_upcall(td, newku); 702 } 703 } 704 mtx_unlock_spin(&sched_lock); 705 return (0); 706 } 707 708 /* 709 * Fill a ucontext_t with a thread's context information. 710 * 711 * This is an analogue to getcontext(3). 712 */ 713 void 714 thread_getcontext(struct thread *td, ucontext_t *uc) 715 { 716 717 /* 718 * XXX this is declared in a MD include file, i386/include/ucontext.h but 719 * is used in MI code. 720 */ 721 #ifdef __i386__ 722 get_mcontext(td, &uc->uc_mcontext); 723 #endif 724 uc->uc_sigmask = td->td_proc->p_sigmask; 725 } 726 727 /* 728 * Set a thread's context from a ucontext_t. 729 * 730 * This is an analogue to setcontext(3). 731 */ 732 int 733 thread_setcontext(struct thread *td, ucontext_t *uc) 734 { 735 int ret; 736 737 /* 738 * XXX this is declared in a MD include file, i386/include/ucontext.h but 739 * is used in MI code. 740 */ 741 #ifdef __i386__ 742 ret = set_mcontext(td, &uc->uc_mcontext); 743 #else 744 ret = ENOSYS; 745 #endif 746 if (ret == 0) { 747 SIG_CANTMASK(uc->uc_sigmask); 748 PROC_LOCK(td->td_proc); 749 td->td_proc->p_sigmask = uc->uc_sigmask; 750 PROC_UNLOCK(td->td_proc); 751 } 752 return (ret); 753 } 754 755 /* 756 * Initialize global thread allocation resources. 757 */ 758 void 759 threadinit(void) 760 { 761 762 #ifndef __ia64__ 763 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 764 thread_ctor, thread_dtor, thread_init, thread_fini, 765 UMA_ALIGN_CACHE, 0); 766 #else 767 /* 768 * XXX the ia64 kstack allocator is really lame and is at the mercy 769 * of contigmallloc(). This hackery is to pre-construct a whole 770 * pile of thread structures with associated kernel stacks early 771 * in the system startup while contigmalloc() still works. Once we 772 * have them, keep them. Sigh. 773 */ 774 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 775 thread_ctor, thread_dtor, thread_init, thread_fini, 776 UMA_ALIGN_CACHE, UMA_ZONE_NOFREE); 777 uma_prealloc(thread_zone, 512); /* XXX arbitary */ 778 #endif 779 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 780 NULL, NULL, ksegrp_init, NULL, 781 UMA_ALIGN_CACHE, 0); 782 kse_zone = uma_zcreate("KSE", sched_sizeof_kse(), 783 NULL, NULL, kse_init, NULL, 784 UMA_ALIGN_CACHE, 0); 785 upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall), 786 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); 787 } 788 789 /* 790 * Stash an embarasingly extra thread into the zombie thread queue. 791 */ 792 void 793 thread_stash(struct thread *td) 794 { 795 mtx_lock_spin(&kse_zombie_lock); 796 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 797 mtx_unlock_spin(&kse_zombie_lock); 798 } 799 800 /* 801 * Stash an embarasingly extra kse into the zombie kse queue. 802 */ 803 void 804 kse_stash(struct kse *ke) 805 { 806 mtx_lock_spin(&kse_zombie_lock); 807 TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); 808 mtx_unlock_spin(&kse_zombie_lock); 809 } 810 811 /* 812 * Stash an embarasingly extra upcall into the zombie upcall queue. 813 */ 814 815 void 816 upcall_stash(struct kse_upcall *ku) 817 { 818 mtx_lock_spin(&kse_zombie_lock); 819 TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link); 820 mtx_unlock_spin(&kse_zombie_lock); 821 } 822 823 /* 824 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 825 */ 826 void 827 ksegrp_stash(struct ksegrp *kg) 828 { 829 mtx_lock_spin(&kse_zombie_lock); 830 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 831 mtx_unlock_spin(&kse_zombie_lock); 832 } 833 834 /* 835 * Reap zombie kse resource. 836 */ 837 void 838 thread_reap(void) 839 { 840 struct thread *td_first, *td_next; 841 struct kse *ke_first, *ke_next; 842 struct ksegrp *kg_first, * kg_next; 843 struct kse_upcall *ku_first, *ku_next; 844 845 /* 846 * Don't even bother to lock if none at this instant, 847 * we really don't care about the next instant.. 848 */ 849 if ((!TAILQ_EMPTY(&zombie_threads)) 850 || (!TAILQ_EMPTY(&zombie_kses)) 851 || (!TAILQ_EMPTY(&zombie_ksegrps)) 852 || (!TAILQ_EMPTY(&zombie_upcalls))) { 853 mtx_lock_spin(&kse_zombie_lock); 854 td_first = TAILQ_FIRST(&zombie_threads); 855 ke_first = TAILQ_FIRST(&zombie_kses); 856 kg_first = TAILQ_FIRST(&zombie_ksegrps); 857 ku_first = TAILQ_FIRST(&zombie_upcalls); 858 if (td_first) 859 TAILQ_INIT(&zombie_threads); 860 if (ke_first) 861 TAILQ_INIT(&zombie_kses); 862 if (kg_first) 863 TAILQ_INIT(&zombie_ksegrps); 864 if (ku_first) 865 TAILQ_INIT(&zombie_upcalls); 866 mtx_unlock_spin(&kse_zombie_lock); 867 while (td_first) { 868 td_next = TAILQ_NEXT(td_first, td_runq); 869 if (td_first->td_ucred) 870 crfree(td_first->td_ucred); 871 thread_free(td_first); 872 td_first = td_next; 873 } 874 while (ke_first) { 875 ke_next = TAILQ_NEXT(ke_first, ke_procq); 876 kse_free(ke_first); 877 ke_first = ke_next; 878 } 879 while (kg_first) { 880 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 881 ksegrp_free(kg_first); 882 kg_first = kg_next; 883 } 884 while (ku_first) { 885 ku_next = TAILQ_NEXT(ku_first, ku_link); 886 upcall_free(ku_first); 887 ku_first = ku_next; 888 } 889 } 890 } 891 892 /* 893 * Allocate a ksegrp. 894 */ 895 struct ksegrp * 896 ksegrp_alloc(void) 897 { 898 return (uma_zalloc(ksegrp_zone, 0)); 899 } 900 901 /* 902 * Allocate a kse. 903 */ 904 struct kse * 905 kse_alloc(void) 906 { 907 return (uma_zalloc(kse_zone, 0)); 908 } 909 910 /* 911 * Allocate a thread. 912 */ 913 struct thread * 914 thread_alloc(void) 915 { 916 thread_reap(); /* check if any zombies to get */ 917 return (uma_zalloc(thread_zone, 0)); 918 } 919 920 /* 921 * Deallocate a ksegrp. 922 */ 923 void 924 ksegrp_free(struct ksegrp *td) 925 { 926 uma_zfree(ksegrp_zone, td); 927 } 928 929 /* 930 * Deallocate a kse. 931 */ 932 void 933 kse_free(struct kse *td) 934 { 935 uma_zfree(kse_zone, td); 936 } 937 938 /* 939 * Deallocate a thread. 940 */ 941 void 942 thread_free(struct thread *td) 943 { 944 945 cpu_thread_clean(td); 946 uma_zfree(thread_zone, td); 947 } 948 949 /* 950 * Store the thread context in the UTS's mailbox. 951 * then add the mailbox at the head of a list we are building in user space. 952 * The list is anchored in the ksegrp structure. 953 */ 954 int 955 thread_export_context(struct thread *td) 956 { 957 struct proc *p; 958 struct ksegrp *kg; 959 uintptr_t mbx; 960 void *addr; 961 int error,temp; 962 ucontext_t uc; 963 964 p = td->td_proc; 965 kg = td->td_ksegrp; 966 967 /* Export the user/machine context. */ 968 addr = (void *)(&td->td_mailbox->tm_context); 969 error = copyin(addr, &uc, sizeof(ucontext_t)); 970 if (error) 971 goto bad; 972 973 thread_getcontext(td, &uc); 974 error = copyout(&uc, addr, sizeof(ucontext_t)); 975 if (error) 976 goto bad; 977 978 /* Exports clock ticks in kernel mode */ 979 addr = (caddr_t)(&td->td_mailbox->tm_sticks); 980 temp = fuword(addr) + td->td_usticks; 981 if (suword(addr, temp)) 982 goto bad; 983 984 /* Get address in latest mbox of list pointer */ 985 addr = (void *)(&td->td_mailbox->tm_next); 986 /* 987 * Put the saved address of the previous first 988 * entry into this one 989 */ 990 for (;;) { 991 mbx = (uintptr_t)kg->kg_completed; 992 if (suword(addr, mbx)) { 993 error = EFAULT; 994 goto bad; 995 } 996 PROC_LOCK(p); 997 if (mbx == (uintptr_t)kg->kg_completed) { 998 kg->kg_completed = td->td_mailbox; 999 /* 1000 * The thread context may be taken away by 1001 * other upcall threads when we unlock 1002 * process lock. it's no longer valid to 1003 * use it again in any other places. 1004 */ 1005 td->td_mailbox = NULL; 1006 PROC_UNLOCK(p); 1007 break; 1008 } 1009 PROC_UNLOCK(p); 1010 } 1011 td->td_usticks = 0; 1012 return (0); 1013 1014 bad: 1015 PROC_LOCK(p); 1016 psignal(p, SIGSEGV); 1017 PROC_UNLOCK(p); 1018 /* The mailbox is bad, don't use it */ 1019 td->td_mailbox = NULL; 1020 td->td_usticks = 0; 1021 return (error); 1022 } 1023 1024 /* 1025 * Take the list of completed mailboxes for this KSEGRP and put them on this 1026 * upcall's mailbox as it's the next one going up. 1027 */ 1028 static int 1029 thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku) 1030 { 1031 struct proc *p = kg->kg_proc; 1032 void *addr; 1033 uintptr_t mbx; 1034 1035 addr = (void *)(&ku->ku_mailbox->km_completed); 1036 for (;;) { 1037 mbx = (uintptr_t)kg->kg_completed; 1038 if (suword(addr, mbx)) { 1039 PROC_LOCK(p); 1040 psignal(p, SIGSEGV); 1041 PROC_UNLOCK(p); 1042 return (EFAULT); 1043 } 1044 /* XXXKSE could use atomic CMPXCH here */ 1045 PROC_LOCK(p); 1046 if (mbx == (uintptr_t)kg->kg_completed) { 1047 kg->kg_completed = NULL; 1048 PROC_UNLOCK(p); 1049 break; 1050 } 1051 PROC_UNLOCK(p); 1052 } 1053 return (0); 1054 } 1055 1056 /* 1057 * This function should be called at statclock interrupt time 1058 */ 1059 int 1060 thread_statclock(int user) 1061 { 1062 struct thread *td = curthread; 1063 1064 if (td->td_ksegrp->kg_numupcalls == 0) 1065 return (-1); 1066 if (user) { 1067 /* Current always do via ast() */ 1068 td->td_kse->ke_flags |= KEF_ASTPENDING; /* XXX TDF_ASTPENDING */ 1069 td->td_flags |= TDF_USTATCLOCK; 1070 td->td_uuticks++; 1071 } else { 1072 if (td->td_mailbox != NULL) 1073 td->td_usticks++; 1074 else { 1075 /* XXXKSE 1076 * We will call thread_user_enter() for every 1077 * kernel entry in future, so if the thread mailbox 1078 * is NULL, it must be a UTS kernel, don't account 1079 * clock ticks for it. 1080 */ 1081 } 1082 } 1083 return (0); 1084 } 1085 1086 /* 1087 * Export user mode state clock ticks 1088 */ 1089 static int 1090 thread_update_usr_ticks(struct thread *td) 1091 { 1092 struct proc *p = td->td_proc; 1093 struct kse_thr_mailbox *tmbx; 1094 struct kse_upcall *ku; 1095 caddr_t addr; 1096 uint uticks; 1097 1098 if ((ku = td->td_upcall) == NULL) 1099 return (-1); 1100 1101 tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread); 1102 if ((tmbx == NULL) || (tmbx == (void *)-1)) 1103 return (-1); 1104 uticks = td->td_uuticks; 1105 td->td_uuticks = 0; 1106 if (uticks) { 1107 addr = (caddr_t)&tmbx->tm_uticks; 1108 uticks += fuword(addr); 1109 if (suword(addr, uticks)) { 1110 PROC_LOCK(p); 1111 psignal(p, SIGSEGV); 1112 PROC_UNLOCK(p); 1113 return (-2); 1114 } 1115 } 1116 return (0); 1117 } 1118 1119 /* 1120 * Export kernel mode state clock ticks 1121 */ 1122 1123 static int 1124 thread_update_sys_ticks(struct thread *td) 1125 { 1126 struct proc *p = td->td_proc; 1127 caddr_t addr; 1128 int sticks; 1129 1130 if (td->td_mailbox == NULL) 1131 return (-1); 1132 if (td->td_usticks == 0) 1133 return (0); 1134 addr = (caddr_t)&td->td_mailbox->tm_sticks; 1135 sticks = fuword(addr); 1136 /* XXXKSE use XCHG instead */ 1137 sticks += td->td_usticks; 1138 td->td_usticks = 0; 1139 if (suword(addr, sticks)) { 1140 PROC_LOCK(p); 1141 psignal(p, SIGSEGV); 1142 PROC_UNLOCK(p); 1143 return (-2); 1144 } 1145 return (0); 1146 } 1147 1148 /* 1149 * Discard the current thread and exit from its context. 1150 * 1151 * Because we can't free a thread while we're operating under its context, 1152 * push the current thread into our CPU's deadthread holder. This means 1153 * we needn't worry about someone else grabbing our context before we 1154 * do a cpu_throw(). 1155 */ 1156 void 1157 thread_exit(void) 1158 { 1159 struct thread *td; 1160 struct kse *ke; 1161 struct proc *p; 1162 struct ksegrp *kg; 1163 1164 td = curthread; 1165 kg = td->td_ksegrp; 1166 p = td->td_proc; 1167 ke = td->td_kse; 1168 1169 mtx_assert(&sched_lock, MA_OWNED); 1170 KASSERT(p != NULL, ("thread exiting without a process")); 1171 KASSERT(ke != NULL, ("thread exiting without a kse")); 1172 KASSERT(kg != NULL, ("thread exiting without a kse group")); 1173 PROC_LOCK_ASSERT(p, MA_OWNED); 1174 CTR1(KTR_PROC, "thread_exit: thread %p", td); 1175 KASSERT(!mtx_owned(&Giant), ("dying thread owns giant")); 1176 1177 if (td->td_standin != NULL) { 1178 thread_stash(td->td_standin); 1179 td->td_standin = NULL; 1180 } 1181 1182 cpu_thread_exit(td); /* XXXSMP */ 1183 1184 /* 1185 * The last thread is left attached to the process 1186 * So that the whole bundle gets recycled. Skip 1187 * all this stuff. 1188 */ 1189 if (p->p_numthreads > 1) { 1190 /* 1191 * Unlink this thread from its proc and the kseg. 1192 * In keeping with the other structs we probably should 1193 * have a thread_unlink() that does some of this but it 1194 * would only be called from here (I think) so it would 1195 * be a waste. (might be useful for proc_fini() as well.) 1196 */ 1197 TAILQ_REMOVE(&p->p_threads, td, td_plist); 1198 p->p_numthreads--; 1199 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 1200 kg->kg_numthreads--; 1201 1202 /* 1203 * The test below is NOT true if we are the 1204 * sole exiting thread. P_STOPPED_SNGL is unset 1205 * in exit1() after it is the only survivor. 1206 */ 1207 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1208 if (p->p_numthreads == p->p_suspcount) { 1209 thread_unsuspend_one(p->p_singlethread); 1210 } 1211 } 1212 1213 /* 1214 * Because each upcall structure has an owner thread, 1215 * owner thread exits only when process is in exiting 1216 * state, so upcall to userland is no longer needed, 1217 * deleting upcall structure is safe here. 1218 * So when all threads in a group is exited, all upcalls 1219 * in the group should be automatically freed. 1220 */ 1221 if (td->td_upcall) 1222 upcall_remove(td); 1223 1224 ke->ke_state = KES_UNQUEUED; 1225 ke->ke_thread = NULL; 1226 /* 1227 * Decide what to do with the KSE attached to this thread. 1228 */ 1229 if (ke->ke_flags & KEF_EXIT) 1230 kse_unlink(ke); 1231 else 1232 kse_reassign(ke); 1233 PROC_UNLOCK(p); 1234 td->td_kse = NULL; 1235 td->td_state = TDS_INACTIVE; 1236 td->td_proc = NULL; 1237 td->td_ksegrp = NULL; 1238 td->td_last_kse = NULL; 1239 PCPU_SET(deadthread, td); 1240 } else { 1241 PROC_UNLOCK(p); 1242 } 1243 cpu_throw(); 1244 /* NOTREACHED */ 1245 } 1246 1247 /* 1248 * Do any thread specific cleanups that may be needed in wait() 1249 * called with Giant held, proc and schedlock not held. 1250 */ 1251 void 1252 thread_wait(struct proc *p) 1253 { 1254 struct thread *td; 1255 1256 KASSERT((p->p_numthreads == 1), ("Muliple threads in wait1()")); 1257 KASSERT((p->p_numksegrps == 1), ("Muliple ksegrps in wait1()")); 1258 FOREACH_THREAD_IN_PROC(p, td) { 1259 if (td->td_standin != NULL) { 1260 thread_free(td->td_standin); 1261 td->td_standin = NULL; 1262 } 1263 cpu_thread_clean(td); 1264 } 1265 thread_reap(); /* check for zombie threads etc. */ 1266 } 1267 1268 /* 1269 * Link a thread to a process. 1270 * set up anything that needs to be initialized for it to 1271 * be used by the process. 1272 * 1273 * Note that we do not link to the proc's ucred here. 1274 * The thread is linked as if running but no KSE assigned. 1275 */ 1276 void 1277 thread_link(struct thread *td, struct ksegrp *kg) 1278 { 1279 struct proc *p; 1280 1281 p = kg->kg_proc; 1282 td->td_state = TDS_INACTIVE; 1283 td->td_proc = p; 1284 td->td_ksegrp = kg; 1285 td->td_last_kse = NULL; 1286 td->td_flags = 0; 1287 td->td_kse = NULL; 1288 1289 LIST_INIT(&td->td_contested); 1290 callout_init(&td->td_slpcallout, 1); 1291 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 1292 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 1293 p->p_numthreads++; 1294 kg->kg_numthreads++; 1295 } 1296 1297 /* 1298 * Purge a ksegrp resource. When a ksegrp is preparing to 1299 * exit, it calls this function. 1300 */ 1301 void 1302 kse_purge_group(struct thread *td) 1303 { 1304 struct ksegrp *kg; 1305 struct kse *ke; 1306 1307 kg = td->td_ksegrp; 1308 KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__)); 1309 while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 1310 KASSERT(ke->ke_state == KES_IDLE, 1311 ("%s: wrong idle KSE state", __func__)); 1312 kse_unlink(ke); 1313 } 1314 KASSERT((kg->kg_kses == 1), 1315 ("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses)); 1316 KASSERT((kg->kg_numupcalls == 0), 1317 ("%s: ksegrp still has %d upcall datas", 1318 __func__, kg->kg_numupcalls)); 1319 } 1320 1321 /* 1322 * Purge a process's KSE resource. When a process is preparing to 1323 * exit, it calls kse_purge to release any extra KSE resources in 1324 * the process. 1325 */ 1326 void 1327 kse_purge(struct proc *p, struct thread *td) 1328 { 1329 struct ksegrp *kg; 1330 struct kse *ke; 1331 1332 KASSERT(p->p_numthreads == 1, ("bad thread number")); 1333 mtx_lock_spin(&sched_lock); 1334 while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) { 1335 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 1336 p->p_numksegrps--; 1337 /* 1338 * There is no ownership for KSE, after all threads 1339 * in the group exited, it is possible that some KSEs 1340 * were left in idle queue, gc them now. 1341 */ 1342 while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { 1343 KASSERT(ke->ke_state == KES_IDLE, 1344 ("%s: wrong idle KSE state", __func__)); 1345 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 1346 kg->kg_idle_kses--; 1347 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 1348 kg->kg_kses--; 1349 kse_stash(ke); 1350 } 1351 KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) || 1352 ((kg->kg_kses == 1) && (kg == td->td_ksegrp)), 1353 ("ksegrp has wrong kg_kses: %d", kg->kg_kses)); 1354 KASSERT((kg->kg_numupcalls == 0), 1355 ("%s: ksegrp still has %d upcall datas", 1356 __func__, kg->kg_numupcalls)); 1357 1358 if (kg != td->td_ksegrp) 1359 ksegrp_stash(kg); 1360 } 1361 TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp); 1362 p->p_numksegrps++; 1363 mtx_unlock_spin(&sched_lock); 1364 } 1365 1366 /* 1367 * This function is intended to be used to initialize a spare thread 1368 * for upcall. Initialize thread's large data area outside sched_lock 1369 * for thread_schedule_upcall(). 1370 */ 1371 void 1372 thread_alloc_spare(struct thread *td, struct thread *spare) 1373 { 1374 if (td->td_standin) 1375 return; 1376 if (spare == NULL) 1377 spare = thread_alloc(); 1378 td->td_standin = spare; 1379 bzero(&spare->td_startzero, 1380 (unsigned)RANGEOF(struct thread, td_startzero, td_endzero)); 1381 spare->td_proc = td->td_proc; 1382 /* Setup PCB and fork address */ 1383 cpu_set_upcall(spare, td->td_pcb); 1384 /* 1385 * XXXKSE do we really need this? (default values for the 1386 * frame). 1387 */ 1388 bcopy(td->td_frame, spare->td_frame, sizeof(struct trapframe)); 1389 spare->td_ucred = crhold(td->td_ucred); 1390 } 1391 1392 /* 1393 * Create a thread and schedule it for upcall on the KSE given. 1394 * Use our thread's standin so that we don't have to allocate one. 1395 */ 1396 struct thread * 1397 thread_schedule_upcall(struct thread *td, struct kse_upcall *ku) 1398 { 1399 struct thread *td2; 1400 1401 mtx_assert(&sched_lock, MA_OWNED); 1402 1403 /* 1404 * Schedule an upcall thread on specified kse_upcall, 1405 * the kse_upcall must be free. 1406 * td must have a spare thread. 1407 */ 1408 KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__)); 1409 if ((td2 = td->td_standin) != NULL) { 1410 td->td_standin = NULL; 1411 } else { 1412 panic("no reserve thread when scheduling an upcall"); 1413 return (NULL); 1414 } 1415 CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)", 1416 td2, td->td_proc->p_pid, td->td_proc->p_comm); 1417 bcopy(&td->td_startcopy, &td2->td_startcopy, 1418 (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); 1419 thread_link(td2, ku->ku_ksegrp); 1420 /* Let the new thread become owner of the upcall */ 1421 ku->ku_owner = td2; 1422 td2->td_upcall = ku; 1423 td2->td_flags = TDF_UPCALLING; 1424 td2->td_kse = NULL; 1425 td2->td_state = TDS_CAN_RUN; 1426 td2->td_inhibitors = 0; 1427 setrunqueue(td2); 1428 return (td2); /* bogus.. should be a void function */ 1429 } 1430 1431 /* 1432 * Schedule an upcall to notify a KSE process recieved signals. 1433 * 1434 * XXX - Modifying a sigset_t like this is totally bogus. 1435 */ 1436 struct thread * 1437 signal_upcall(struct proc *p, int sig) 1438 { 1439 #if 0 1440 struct thread *td, *td2; 1441 struct kse *ke; 1442 sigset_t ss; 1443 int error; 1444 1445 #endif 1446 PROC_LOCK_ASSERT(p, MA_OWNED); 1447 return (NULL); 1448 #if 0 1449 td = FIRST_THREAD_IN_PROC(p); 1450 ke = td->td_kse; 1451 PROC_UNLOCK(p); 1452 error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t)); 1453 PROC_LOCK(p); 1454 if (error) 1455 return (NULL); 1456 SIGADDSET(ss, sig); 1457 PROC_UNLOCK(p); 1458 error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t)); 1459 PROC_LOCK(p); 1460 if (error) 1461 return (NULL); 1462 if (td->td_standin == NULL) 1463 thread_alloc_spare(td, NULL); 1464 mtx_lock_spin(&sched_lock); 1465 td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */ 1466 mtx_unlock_spin(&sched_lock); 1467 return (td2); 1468 #endif 1469 } 1470 1471 /* 1472 * Setup done on the thread when it enters the kernel. 1473 * XXXKSE Presently only for syscalls but eventually all kernel entries. 1474 */ 1475 void 1476 thread_user_enter(struct proc *p, struct thread *td) 1477 { 1478 struct ksegrp *kg; 1479 struct kse_upcall *ku; 1480 1481 kg = td->td_ksegrp; 1482 /* 1483 * First check that we shouldn't just abort. 1484 * But check if we are the single thread first! 1485 * XXX p_singlethread not locked, but should be safe. 1486 */ 1487 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1488 PROC_LOCK(p); 1489 mtx_lock_spin(&sched_lock); 1490 thread_exit(); 1491 /* NOTREACHED */ 1492 } 1493 1494 /* 1495 * If we are doing a syscall in a KSE environment, 1496 * note where our mailbox is. There is always the 1497 * possibility that we could do this lazily (in kse_reassign()), 1498 * but for now do it every time. 1499 */ 1500 kg = td->td_ksegrp; 1501 if (kg->kg_numupcalls) { 1502 ku = td->td_upcall; 1503 KASSERT(ku, ("%s: no upcall owned", __func__)); 1504 KASSERT((ku->ku_owner == td), ("%s: wrong owner", __func__)); 1505 td->td_mailbox = 1506 (void *)fuword((void *)&ku->ku_mailbox->km_curthread); 1507 if ((td->td_mailbox == NULL) || 1508 (td->td_mailbox == (void *)-1)) { 1509 /* Don't schedule upcall when blocked */ 1510 td->td_mailbox = NULL; 1511 mtx_lock_spin(&sched_lock); 1512 td->td_flags &= ~TDF_CAN_UNBIND; 1513 mtx_unlock_spin(&sched_lock); 1514 } else { 1515 if (p->p_numthreads > max_threads_per_proc) { 1516 /* 1517 * Since kernel thread limit reached, 1518 * don't schedule upcall anymore. 1519 * XXXKSE These code in fact needn't. 1520 */ 1521 mtx_lock_spin(&sched_lock); 1522 td->td_flags &= ~TDF_CAN_UNBIND; 1523 mtx_unlock_spin(&sched_lock); 1524 } else { 1525 if (td->td_standin == NULL) 1526 thread_alloc_spare(td, NULL); 1527 mtx_lock_spin(&sched_lock); 1528 td->td_flags |= TDF_CAN_UNBIND; 1529 mtx_unlock_spin(&sched_lock); 1530 } 1531 } 1532 } 1533 } 1534 1535 /* 1536 * The extra work we go through if we are a threaded process when we 1537 * return to userland. 1538 * 1539 * If we are a KSE process and returning to user mode, check for 1540 * extra work to do before we return (e.g. for more syscalls 1541 * to complete first). If we were in a critical section, we should 1542 * just return to let it finish. Same if we were in the UTS (in 1543 * which case the mailbox's context's busy indicator will be set). 1544 * The only traps we suport will have set the mailbox. 1545 * We will clear it here. 1546 */ 1547 int 1548 thread_userret(struct thread *td, struct trapframe *frame) 1549 { 1550 int error; 1551 struct kse_upcall *ku; 1552 struct ksegrp *kg; 1553 struct proc *p; 1554 struct timespec ts; 1555 1556 p = td->td_proc; 1557 kg = td->td_ksegrp; 1558 1559 /* Nothing to do with non-threaded group/process */ 1560 if (td->td_ksegrp->kg_numupcalls == 0) 1561 return (0); 1562 1563 /* 1564 * Stat clock interrupt hit in userland, it 1565 * is returning from interrupt, charge thread's 1566 * userland time for UTS. 1567 */ 1568 if (td->td_flags & TDF_USTATCLOCK) { 1569 thread_update_usr_ticks(td); 1570 mtx_lock_spin(&sched_lock); 1571 td->td_flags &= ~TDF_USTATCLOCK; 1572 mtx_unlock_spin(&sched_lock); 1573 } 1574 1575 /* 1576 * Optimisation: 1577 * This thread has not started any upcall. 1578 * If there is no work to report other than ourself, 1579 * then it can return direct to userland. 1580 */ 1581 if (TD_CAN_UNBIND(td)) { 1582 mtx_lock_spin(&sched_lock); 1583 td->td_flags &= ~TDF_CAN_UNBIND; 1584 mtx_unlock_spin(&sched_lock); 1585 if ((kg->kg_completed == NULL) && 1586 (td->td_upcall->ku_flags & KUF_DOUPCALL) == 0) { 1587 thread_update_sys_ticks(td); 1588 td->td_mailbox = NULL; 1589 return (0); 1590 } 1591 error = thread_export_context(td); 1592 if (error) { 1593 /* 1594 * Failing to do the KSE operation just defaults 1595 * back to synchonous operation, so just return from 1596 * the syscall. 1597 */ 1598 return (0); 1599 } 1600 /* 1601 * There is something to report, and we own an upcall 1602 * strucuture, we can go to userland. 1603 * Turn ourself into an upcall thread. 1604 */ 1605 mtx_lock_spin(&sched_lock); 1606 td->td_flags |= TDF_UPCALLING; 1607 mtx_unlock_spin(&sched_lock); 1608 } else if (td->td_mailbox) { 1609 error = thread_export_context(td); 1610 if (error) { 1611 PROC_LOCK(td->td_proc); 1612 mtx_lock_spin(&sched_lock); 1613 /* possibly upcall with error? */ 1614 } else { 1615 PROC_LOCK(td->td_proc); 1616 mtx_lock_spin(&sched_lock); 1617 /* 1618 * There are upcall threads waiting for 1619 * work to do, wake one of them up. 1620 * XXXKSE Maybe wake all of them up. 1621 */ 1622 if (kg->kg_upsleeps) 1623 wakeup_one(&kg->kg_completed); 1624 } 1625 thread_exit(); 1626 /* NOTREACHED */ 1627 } 1628 1629 if (td->td_flags & TDF_UPCALLING) { 1630 KASSERT(TD_CAN_UNBIND(td) == 0, ("upcall thread can unbind")); 1631 ku = td->td_upcall; 1632 /* 1633 * There is no more work to do and we are going to ride 1634 * this thread up to userland as an upcall. 1635 * Do the last parts of the setup needed for the upcall. 1636 */ 1637 CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)", 1638 td, td->td_proc->p_pid, td->td_proc->p_comm); 1639 1640 /* 1641 * Set user context to the UTS. 1642 * Will use Giant in cpu_thread_clean() because it uses 1643 * kmem_free(kernel_map, ...) 1644 */ 1645 cpu_set_upcall_kse(td, ku); 1646 1647 /* 1648 * Clear TDF_UPCALLING after set upcall context, 1649 * profiling code looks TDF_UPCALLING to avoid account 1650 * a wrong user %EIP 1651 */ 1652 mtx_lock_spin(&sched_lock); 1653 td->td_flags &= ~TDF_UPCALLING; 1654 if (ku->ku_flags & KUF_DOUPCALL) 1655 ku->ku_flags &= ~KUF_DOUPCALL; 1656 mtx_unlock_spin(&sched_lock); 1657 1658 /* 1659 * Unhook the list of completed threads. 1660 * anything that completes after this gets to 1661 * come in next time. 1662 * Put the list of completed thread mailboxes on 1663 * this KSE's mailbox. 1664 */ 1665 error = thread_link_mboxes(kg, ku); 1666 if (error) 1667 goto bad; 1668 1669 /* 1670 * Set state and clear the thread mailbox pointer. 1671 * From now on we are just a bound outgoing process. 1672 * **Problem** userret is often called several times. 1673 * it would be nice if this all happenned only on the first 1674 * time through. (the scan for extra work etc.) 1675 */ 1676 error = suword((caddr_t)&ku->ku_mailbox->km_curthread, 0); 1677 if (error) 1678 goto bad; 1679 1680 /* Export current system time */ 1681 nanotime(&ts); 1682 if (copyout(&ts, 1683 (caddr_t)&ku->ku_mailbox->km_timeofday, sizeof(ts))) { 1684 goto bad; 1685 } 1686 } 1687 /* 1688 * Optimisation: 1689 * Ensure that we have a spare thread available, 1690 * for when we re-enter the kernel. 1691 */ 1692 if (td->td_standin == NULL) 1693 thread_alloc_spare(td, NULL); 1694 1695 /* 1696 * Clear thread mailbox first, then clear system tick count. 1697 * The order is important because thread_statclock() use 1698 * mailbox pointer to see if it is an userland thread or 1699 * an UTS kernel thread. 1700 */ 1701 td->td_mailbox = NULL; 1702 td->td_usticks = 0; 1703 return (0); 1704 1705 bad: 1706 /* 1707 * Things are going to be so screwed we should just kill the process. 1708 * how do we do that? 1709 */ 1710 PROC_LOCK(td->td_proc); 1711 psignal(td->td_proc, SIGSEGV); 1712 PROC_UNLOCK(td->td_proc); 1713 td->td_mailbox = NULL; 1714 td->td_usticks = 0; 1715 return (error); /* go sync */ 1716 } 1717 1718 /* 1719 * Enforce single-threading. 1720 * 1721 * Returns 1 if the caller must abort (another thread is waiting to 1722 * exit the process or similar). Process is locked! 1723 * Returns 0 when you are successfully the only thread running. 1724 * A process has successfully single threaded in the suspend mode when 1725 * There are no threads in user mode. Threads in the kernel must be 1726 * allowed to continue until they get to the user boundary. They may even 1727 * copy out their return values and data before suspending. They may however be 1728 * accellerated in reaching the user boundary as we will wake up 1729 * any sleeping threads that are interruptable. (PCATCH). 1730 */ 1731 int 1732 thread_single(int force_exit) 1733 { 1734 struct thread *td; 1735 struct thread *td2; 1736 struct proc *p; 1737 1738 td = curthread; 1739 p = td->td_proc; 1740 mtx_assert(&Giant, MA_OWNED); 1741 PROC_LOCK_ASSERT(p, MA_OWNED); 1742 KASSERT((td != NULL), ("curthread is NULL")); 1743 1744 if ((p->p_flag & P_KSES) == 0) 1745 return (0); 1746 1747 /* Is someone already single threading? */ 1748 if (p->p_singlethread) 1749 return (1); 1750 1751 if (force_exit == SINGLE_EXIT) { 1752 p->p_flag |= P_SINGLE_EXIT; 1753 } else 1754 p->p_flag &= ~P_SINGLE_EXIT; 1755 p->p_flag |= P_STOPPED_SINGLE; 1756 p->p_singlethread = td; 1757 /* XXXKSE Which lock protects the below values? */ 1758 while ((p->p_numthreads - p->p_suspcount) != 1) { 1759 mtx_lock_spin(&sched_lock); 1760 FOREACH_THREAD_IN_PROC(p, td2) { 1761 if (td2 == td) 1762 continue; 1763 if (TD_IS_INHIBITED(td2)) { 1764 if (force_exit == SINGLE_EXIT) { 1765 if (TD_IS_SUSPENDED(td2)) { 1766 thread_unsuspend_one(td2); 1767 } 1768 if (TD_ON_SLEEPQ(td2) && 1769 (td2->td_flags & TDF_SINTR)) { 1770 if (td2->td_flags & TDF_CVWAITQ) 1771 cv_abort(td2); 1772 else 1773 abortsleep(td2); 1774 } 1775 } else { 1776 if (TD_IS_SUSPENDED(td2)) 1777 continue; 1778 /* 1779 * maybe other inhibitted states too? 1780 * XXXKSE Is it totally safe to 1781 * suspend a non-interruptable thread? 1782 */ 1783 if (td2->td_inhibitors & 1784 (TDI_SLEEPING | TDI_SWAPPED)) 1785 thread_suspend_one(td2); 1786 } 1787 } 1788 } 1789 /* 1790 * Maybe we suspended some threads.. was it enough? 1791 */ 1792 if ((p->p_numthreads - p->p_suspcount) == 1) { 1793 mtx_unlock_spin(&sched_lock); 1794 break; 1795 } 1796 1797 /* 1798 * Wake us up when everyone else has suspended. 1799 * In the mean time we suspend as well. 1800 */ 1801 thread_suspend_one(td); 1802 mtx_unlock(&Giant); 1803 PROC_UNLOCK(p); 1804 p->p_stats->p_ru.ru_nvcsw++; 1805 mi_switch(); 1806 mtx_unlock_spin(&sched_lock); 1807 mtx_lock(&Giant); 1808 PROC_LOCK(p); 1809 } 1810 if (force_exit == SINGLE_EXIT) { 1811 if (td->td_upcall) { 1812 mtx_lock_spin(&sched_lock); 1813 upcall_remove(td); 1814 mtx_unlock_spin(&sched_lock); 1815 } 1816 kse_purge(p, td); 1817 } 1818 return (0); 1819 } 1820 1821 /* 1822 * Called in from locations that can safely check to see 1823 * whether we have to suspend or at least throttle for a 1824 * single-thread event (e.g. fork). 1825 * 1826 * Such locations include userret(). 1827 * If the "return_instead" argument is non zero, the thread must be able to 1828 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 1829 * 1830 * The 'return_instead' argument tells the function if it may do a 1831 * thread_exit() or suspend, or whether the caller must abort and back 1832 * out instead. 1833 * 1834 * If the thread that set the single_threading request has set the 1835 * P_SINGLE_EXIT bit in the process flags then this call will never return 1836 * if 'return_instead' is false, but will exit. 1837 * 1838 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 1839 *---------------+--------------------+--------------------- 1840 * 0 | returns 0 | returns 0 or 1 1841 * | when ST ends | immediatly 1842 *---------------+--------------------+--------------------- 1843 * 1 | thread exits | returns 1 1844 * | | immediatly 1845 * 0 = thread_exit() or suspension ok, 1846 * other = return error instead of stopping the thread. 1847 * 1848 * While a full suspension is under effect, even a single threading 1849 * thread would be suspended if it made this call (but it shouldn't). 1850 * This call should only be made from places where 1851 * thread_exit() would be safe as that may be the outcome unless 1852 * return_instead is set. 1853 */ 1854 int 1855 thread_suspend_check(int return_instead) 1856 { 1857 struct thread *td; 1858 struct proc *p; 1859 struct ksegrp *kg; 1860 1861 td = curthread; 1862 p = td->td_proc; 1863 kg = td->td_ksegrp; 1864 PROC_LOCK_ASSERT(p, MA_OWNED); 1865 while (P_SHOULDSTOP(p)) { 1866 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1867 KASSERT(p->p_singlethread != NULL, 1868 ("singlethread not set")); 1869 /* 1870 * The only suspension in action is a 1871 * single-threading. Single threader need not stop. 1872 * XXX Should be safe to access unlocked 1873 * as it can only be set to be true by us. 1874 */ 1875 if (p->p_singlethread == td) 1876 return (0); /* Exempt from stopping. */ 1877 } 1878 if (return_instead) 1879 return (1); 1880 1881 /* 1882 * If the process is waiting for us to exit, 1883 * this thread should just suicide. 1884 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 1885 */ 1886 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1887 mtx_lock_spin(&sched_lock); 1888 while (mtx_owned(&Giant)) 1889 mtx_unlock(&Giant); 1890 thread_exit(); 1891 } 1892 1893 /* 1894 * When a thread suspends, it just 1895 * moves to the processes's suspend queue 1896 * and stays there. 1897 */ 1898 mtx_lock_spin(&sched_lock); 1899 if ((p->p_flag & P_STOPPED_SIG) && 1900 (p->p_suspcount+1 == p->p_numthreads)) { 1901 mtx_unlock_spin(&sched_lock); 1902 PROC_LOCK(p->p_pptr); 1903 if ((p->p_pptr->p_procsig->ps_flag & 1904 PS_NOCLDSTOP) == 0) { 1905 psignal(p->p_pptr, SIGCHLD); 1906 } 1907 PROC_UNLOCK(p->p_pptr); 1908 mtx_lock_spin(&sched_lock); 1909 } 1910 mtx_assert(&Giant, MA_NOTOWNED); 1911 thread_suspend_one(td); 1912 PROC_UNLOCK(p); 1913 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1914 if (p->p_numthreads == p->p_suspcount) { 1915 thread_unsuspend_one(p->p_singlethread); 1916 } 1917 } 1918 p->p_stats->p_ru.ru_nivcsw++; 1919 mi_switch(); 1920 mtx_unlock_spin(&sched_lock); 1921 PROC_LOCK(p); 1922 } 1923 return (0); 1924 } 1925 1926 void 1927 thread_suspend_one(struct thread *td) 1928 { 1929 struct proc *p = td->td_proc; 1930 1931 mtx_assert(&sched_lock, MA_OWNED); 1932 p->p_suspcount++; 1933 TD_SET_SUSPENDED(td); 1934 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 1935 /* 1936 * Hack: If we are suspending but are on the sleep queue 1937 * then we are in msleep or the cv equivalent. We 1938 * want to look like we have two Inhibitors. 1939 * May already be set.. doesn't matter. 1940 */ 1941 if (TD_ON_SLEEPQ(td)) 1942 TD_SET_SLEEPING(td); 1943 } 1944 1945 void 1946 thread_unsuspend_one(struct thread *td) 1947 { 1948 struct proc *p = td->td_proc; 1949 1950 mtx_assert(&sched_lock, MA_OWNED); 1951 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1952 TD_CLR_SUSPENDED(td); 1953 p->p_suspcount--; 1954 setrunnable(td); 1955 } 1956 1957 /* 1958 * Allow all threads blocked by single threading to continue running. 1959 */ 1960 void 1961 thread_unsuspend(struct proc *p) 1962 { 1963 struct thread *td; 1964 1965 mtx_assert(&sched_lock, MA_OWNED); 1966 PROC_LOCK_ASSERT(p, MA_OWNED); 1967 if (!P_SHOULDSTOP(p)) { 1968 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1969 thread_unsuspend_one(td); 1970 } 1971 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1972 (p->p_numthreads == p->p_suspcount)) { 1973 /* 1974 * Stopping everything also did the job for the single 1975 * threading request. Now we've downgraded to single-threaded, 1976 * let it continue. 1977 */ 1978 thread_unsuspend_one(p->p_singlethread); 1979 } 1980 } 1981 1982 void 1983 thread_single_end(void) 1984 { 1985 struct thread *td; 1986 struct proc *p; 1987 1988 td = curthread; 1989 p = td->td_proc; 1990 PROC_LOCK_ASSERT(p, MA_OWNED); 1991 p->p_flag &= ~P_STOPPED_SINGLE; 1992 p->p_singlethread = NULL; 1993 /* 1994 * If there are other threads they mey now run, 1995 * unless of course there is a blanket 'stop order' 1996 * on the process. The single threader must be allowed 1997 * to continue however as this is a bad place to stop. 1998 */ 1999 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 2000 mtx_lock_spin(&sched_lock); 2001 while (( td = TAILQ_FIRST(&p->p_suspended))) { 2002 thread_unsuspend_one(td); 2003 } 2004 mtx_unlock_spin(&sched_lock); 2005 } 2006 } 2007 2008 2009