1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/systm.h> 31 #include <sys/thread.h> 32 #include <sys/proc.h> 33 #include <sys/task.h> 34 #include <sys/project.h> 35 #include <sys/signal.h> 36 #include <sys/errno.h> 37 #include <sys/vmparam.h> 38 #include <sys/stack.h> 39 #include <sys/procfs.h> 40 #include <sys/prsystm.h> 41 #include <sys/cpuvar.h> 42 #include <sys/kmem.h> 43 #include <sys/vtrace.h> 44 #include <sys/door.h> 45 #include <vm/seg_kp.h> 46 #include <sys/debug.h> 47 #include <sys/tnf.h> 48 #include <sys/schedctl.h> 49 #include <sys/poll.h> 50 #include <sys/copyops.h> 51 #include <sys/lwp_upimutex_impl.h> 52 #include <sys/cpupart.h> 53 #include <sys/lgrp.h> 54 #include <sys/rctl.h> 55 #include <sys/contract_impl.h> 56 #include <sys/cpc_impl.h> 57 #include <sys/sdt.h> 58 #include <sys/cmn_err.h> 59 #include <sys/brand.h> 60 #include <sys/cyclic.h> 61 #include <sys/pool.h> 62 63 /* hash function for the lwpid hash table, p->p_tidhash[] */ 64 #define TIDHASH(tid, hash_sz) ((tid) & ((hash_sz) - 1)) 65 66 void *segkp_lwp; /* cookie for pool of segkp resources */ 67 extern void reapq_move_lq_to_tq(kthread_t *); 68 extern void freectx_ctx(struct ctxop *); 69 70 /* 71 * Create a thread that appears to be stopped at sys_rtt. 72 */ 73 klwp_t * 74 lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p, 75 int state, int pri, const k_sigset_t *smask, int cid, id_t lwpid) 76 { 77 klwp_t *lwp = NULL; 78 kthread_t *t; 79 kthread_t *tx; 80 cpupart_t *oldpart = NULL; 81 size_t stksize; 82 caddr_t lwpdata = NULL; 83 processorid_t binding; 84 int err = 0; 85 kproject_t *oldkpj, *newkpj; 86 void *bufp = NULL; 87 klwp_t *curlwp = ttolwp(curthread); 88 lwpent_t *lep; 89 lwpdir_t *old_dir = NULL; 90 uint_t old_dirsz = 0; 91 tidhash_t *old_hash = NULL; 92 uint_t old_hashsz = 0; 93 ret_tidhash_t *ret_tidhash = NULL; 94 int i; 95 int rctlfail = 0; 96 boolean_t branded = 0; 97 struct ctxop *ctx = NULL; 98 99 mutex_enter(&p->p_lock); 100 mutex_enter(&p->p_zone->zone_nlwps_lock); 101 /* 102 * don't enforce rctl limits on system processes 103 */ 104 if (cid != syscid) { 105 if (p->p_task->tk_nlwps >= p->p_task->tk_nlwps_ctl) 106 if (rctl_test(rc_task_lwps, p->p_task->tk_rctls, p, 107 1, 0) & RCT_DENY) 108 rctlfail = 1; 109 if (p->p_task->tk_proj->kpj_nlwps >= 110 p->p_task->tk_proj->kpj_nlwps_ctl) 111 if (rctl_test(rc_project_nlwps, 112 p->p_task->tk_proj->kpj_rctls, p, 1, 0) 113 & RCT_DENY) 114 rctlfail = 1; 115 if (p->p_zone->zone_nlwps >= p->p_zone->zone_nlwps_ctl) 116 if (rctl_test(rc_zone_nlwps, p->p_zone->zone_rctls, p, 117 1, 0) & RCT_DENY) 118 rctlfail = 1; 119 } 120 if (rctlfail) { 121 mutex_exit(&p->p_zone->zone_nlwps_lock); 122 mutex_exit(&p->p_lock); 123 return (NULL); 124 } 125 p->p_task->tk_nlwps++; 126 p->p_task->tk_proj->kpj_nlwps++; 127 p->p_zone->zone_nlwps++; 128 mutex_exit(&p->p_zone->zone_nlwps_lock); 129 mutex_exit(&p->p_lock); 130 131 if (curlwp == NULL || (stksize = curlwp->lwp_childstksz) == 0) 132 stksize = lwp_default_stksize; 133 134 /* 135 * Try to reclaim a <lwp,stack> from 'deathrow' 136 */ 137 if (stksize == lwp_default_stksize) { 138 if (lwp_reapcnt > 0) { 139 mutex_enter(&reaplock); 140 if ((t = lwp_deathrow) != NULL) { 141 ASSERT(t->t_swap); 142 lwp_deathrow = t->t_forw; 143 lwp_reapcnt--; 144 lwpdata = t->t_swap; 145 lwp = t->t_lwp; 146 ctx = t->t_ctx; 147 t->t_swap = NULL; 148 t->t_lwp = NULL; 149 t->t_ctx = NULL; 150 reapq_move_lq_to_tq(t); 151 } 152 mutex_exit(&reaplock); 153 if (lwp != NULL) { 154 lwp_stk_fini(lwp); 155 } 156 if (ctx != NULL) { 157 freectx_ctx(ctx); 158 } 159 } 160 if (lwpdata == NULL && 161 (lwpdata = (caddr_t)segkp_cache_get(segkp_lwp)) == NULL) { 162 mutex_enter(&p->p_lock); 163 mutex_enter(&p->p_zone->zone_nlwps_lock); 164 p->p_task->tk_nlwps--; 165 p->p_task->tk_proj->kpj_nlwps--; 166 p->p_zone->zone_nlwps--; 167 mutex_exit(&p->p_zone->zone_nlwps_lock); 168 mutex_exit(&p->p_lock); 169 return (NULL); 170 } 171 } else { 172 stksize = roundup(stksize, PAGESIZE); 173 if ((lwpdata = (caddr_t)segkp_get(segkp, stksize, 174 (KPD_NOWAIT | KPD_HASREDZONE | KPD_LOCKED))) == NULL) { 175 mutex_enter(&p->p_lock); 176 mutex_enter(&p->p_zone->zone_nlwps_lock); 177 p->p_task->tk_nlwps--; 178 p->p_task->tk_proj->kpj_nlwps--; 179 p->p_zone->zone_nlwps--; 180 mutex_exit(&p->p_zone->zone_nlwps_lock); 181 mutex_exit(&p->p_lock); 182 return (NULL); 183 } 184 } 185 186 /* 187 * Create a thread, initializing the stack pointer 188 */ 189 t = thread_create(lwpdata, stksize, NULL, NULL, 0, p, TS_STOPPED, pri); 190 191 t->t_swap = lwpdata; /* Start of page-able data */ 192 if (lwp == NULL) 193 lwp = kmem_cache_alloc(lwp_cache, KM_SLEEP); 194 bzero(lwp, sizeof (*lwp)); 195 t->t_lwp = lwp; 196 197 t->t_hold = *smask; 198 lwp->lwp_thread = t; 199 lwp->lwp_procp = p; 200 lwp->lwp_sigaltstack.ss_flags = SS_DISABLE; 201 if (curlwp != NULL && curlwp->lwp_childstksz != 0) 202 lwp->lwp_childstksz = curlwp->lwp_childstksz; 203 204 t->t_stk = lwp_stk_init(lwp, t->t_stk); 205 thread_load(t, proc, arg, len); 206 207 /* 208 * Allocate the SIGPROF buffer if ITIMER_REALPROF is in effect. 209 */ 210 if (p->p_rprof_cyclic != CYCLIC_NONE) 211 t->t_rprof = kmem_zalloc(sizeof (struct rprof), KM_SLEEP); 212 213 if (cid != NOCLASS) 214 (void) CL_ALLOC(&bufp, cid, KM_SLEEP); 215 216 /* 217 * Allocate an lwp directory entry for the new lwp. 218 */ 219 lep = kmem_zalloc(sizeof (*lep), KM_SLEEP); 220 221 mutex_enter(&p->p_lock); 222 grow: 223 /* 224 * Grow the lwp (thread) directory and lwpid hash table if necessary. 225 * A note on the growth algorithm: 226 * The new lwp directory size is computed as: 227 * new = 2 * old + 2 228 * Starting with an initial size of 2 (see exec_common()), 229 * this yields numbers that are a power of two minus 2: 230 * 2, 6, 14, 30, 62, 126, 254, 510, 1022, ... 231 * The size of the lwpid hash table must be a power of two 232 * and must be commensurate in size with the lwp directory 233 * so that hash bucket chains remain short. Therefore, 234 * the lwpid hash table size is computed as: 235 * hashsz = (dirsz + 2) / 2 236 * which leads to these hash table sizes corresponding to 237 * the above directory sizes: 238 * 2, 4, 8, 16, 32, 64, 128, 256, 512, ... 239 * A note on growing the hash table: 240 * For performance reasons, code in lwp_unpark() does not 241 * acquire curproc->p_lock when searching the hash table. 242 * Rather, it calls lwp_hash_lookup_and_lock() which 243 * acquires only the individual hash bucket lock, taking 244 * care to deal with reallocation of the hash table 245 * during the time it takes to acquire the lock. 246 * 247 * This is sufficient to protect the integrity of the 248 * hash table, but it requires us to acquire all of the 249 * old hash bucket locks before growing the hash table 250 * and to release them afterwards. It also requires us 251 * not to free the old hash table because some thread 252 * in lwp_hash_lookup_and_lock() might still be trying 253 * to acquire the old bucket lock. 254 * 255 * So we adopt the tactic of keeping all of the retired 256 * hash tables on a linked list, so they can be safely 257 * freed when the process exits or execs. 258 * 259 * Because the hash table grows in powers of two, the 260 * total size of all of the hash tables will be slightly 261 * less than twice the size of the largest hash table. 262 */ 263 while (p->p_lwpfree == NULL) { 264 uint_t dirsz = p->p_lwpdir_sz; 265 lwpdir_t *new_dir; 266 uint_t new_dirsz; 267 lwpdir_t *ldp; 268 tidhash_t *new_hash; 269 uint_t new_hashsz; 270 271 mutex_exit(&p->p_lock); 272 273 /* 274 * Prepare to remember the old p_tidhash for later 275 * kmem_free()ing when the process exits or execs. 276 */ 277 if (ret_tidhash == NULL) 278 ret_tidhash = kmem_zalloc(sizeof (ret_tidhash_t), 279 KM_SLEEP); 280 if (old_dir != NULL) 281 kmem_free(old_dir, old_dirsz * sizeof (*old_dir)); 282 if (old_hash != NULL) 283 kmem_free(old_hash, old_hashsz * sizeof (*old_hash)); 284 285 new_dirsz = 2 * dirsz + 2; 286 new_dir = kmem_zalloc(new_dirsz * sizeof (lwpdir_t), KM_SLEEP); 287 for (ldp = new_dir, i = 1; i < new_dirsz; i++, ldp++) 288 ldp->ld_next = ldp + 1; 289 new_hashsz = (new_dirsz + 2) / 2; 290 new_hash = kmem_zalloc(new_hashsz * sizeof (tidhash_t), 291 KM_SLEEP); 292 293 mutex_enter(&p->p_lock); 294 if (p == curproc) 295 prbarrier(p); 296 297 if (dirsz != p->p_lwpdir_sz || p->p_lwpfree != NULL) { 298 /* 299 * Someone else beat us to it or some lwp exited. 300 * Set up to free our memory and take a lap. 301 */ 302 old_dir = new_dir; 303 old_dirsz = new_dirsz; 304 old_hash = new_hash; 305 old_hashsz = new_hashsz; 306 } else { 307 /* 308 * For the benefit of lwp_hash_lookup_and_lock(), 309 * called from lwp_unpark(), which searches the 310 * tid hash table without acquiring p->p_lock, 311 * we must acquire all of the tid hash table 312 * locks before replacing p->p_tidhash. 313 */ 314 old_hash = p->p_tidhash; 315 old_hashsz = p->p_tidhash_sz; 316 for (i = 0; i < old_hashsz; i++) { 317 mutex_enter(&old_hash[i].th_lock); 318 mutex_enter(&new_hash[i].th_lock); 319 } 320 321 /* 322 * We simply hash in all of the old directory entries. 323 * This works because the old directory has no empty 324 * slots and the new hash table starts out empty. 325 * This reproduces the original directory ordering 326 * (required for /proc directory semantics). 327 */ 328 old_dir = p->p_lwpdir; 329 old_dirsz = p->p_lwpdir_sz; 330 p->p_lwpdir = new_dir; 331 p->p_lwpfree = new_dir; 332 p->p_lwpdir_sz = new_dirsz; 333 for (ldp = old_dir, i = 0; i < old_dirsz; i++, ldp++) 334 lwp_hash_in(p, ldp->ld_entry, 335 new_hash, new_hashsz, 0); 336 337 /* 338 * Remember the old hash table along with all 339 * of the previously-remembered hash tables. 340 * We will free them at process exit or exec. 341 */ 342 ret_tidhash->rth_tidhash = old_hash; 343 ret_tidhash->rth_tidhash_sz = old_hashsz; 344 ret_tidhash->rth_next = p->p_ret_tidhash; 345 p->p_ret_tidhash = ret_tidhash; 346 347 /* 348 * Now establish the new tid hash table. 349 * As soon as we assign p->p_tidhash, 350 * code in lwp_unpark() can start using it. 351 */ 352 membar_producer(); 353 p->p_tidhash = new_hash; 354 355 /* 356 * It is necessary that p_tidhash reach global 357 * visibility before p_tidhash_sz. Otherwise, 358 * code in lwp_hash_lookup_and_lock() could 359 * index into the old p_tidhash using the new 360 * p_tidhash_sz and thereby access invalid data. 361 */ 362 membar_producer(); 363 p->p_tidhash_sz = new_hashsz; 364 365 /* 366 * Release the locks; allow lwp_unpark() to carry on. 367 */ 368 for (i = 0; i < old_hashsz; i++) { 369 mutex_exit(&old_hash[i].th_lock); 370 mutex_exit(&new_hash[i].th_lock); 371 } 372 373 /* 374 * Avoid freeing these objects below. 375 */ 376 ret_tidhash = NULL; 377 old_hash = NULL; 378 old_hashsz = 0; 379 } 380 } 381 382 /* 383 * Block the process against /proc while we manipulate p->p_tlist, 384 * unless lwp_create() was called by /proc for the PCAGENT operation. 385 * We want to do this early enough so that we don't drop p->p_lock 386 * until the thread is put on the p->p_tlist. 387 */ 388 if (p == curproc) { 389 prbarrier(p); 390 /* 391 * If the current lwp has been requested to stop, do so now. 392 * Otherwise we have a race condition between /proc attempting 393 * to stop the process and this thread creating a new lwp 394 * that was not seen when the /proc PCSTOP request was issued. 395 * We rely on stop() to call prbarrier(p) before returning. 396 */ 397 while ((curthread->t_proc_flag & TP_PRSTOP) && 398 !ttolwp(curthread)->lwp_nostop) { 399 /* 400 * We called pool_barrier_enter() before calling 401 * here to lwp_create(). We have to call 402 * pool_barrier_exit() before stopping. 403 */ 404 pool_barrier_exit(); 405 prbarrier(p); 406 stop(PR_REQUESTED, 0); 407 /* 408 * And we have to repeat the call to 409 * pool_barrier_enter after stopping. 410 */ 411 pool_barrier_enter(); 412 prbarrier(p); 413 } 414 415 /* 416 * If process is exiting, there could be a race between 417 * the agent lwp creation and the new lwp currently being 418 * created. So to prevent this race lwp creation is failed 419 * if the process is exiting. 420 */ 421 if (p->p_flag & (SEXITLWPS|SKILLED)) { 422 err = 1; 423 goto error; 424 } 425 426 /* 427 * Since we might have dropped p->p_lock, the 428 * lwp directory free list might have changed. 429 */ 430 if (p->p_lwpfree == NULL) 431 goto grow; 432 } 433 434 kpreempt_disable(); /* can't grab cpu_lock here */ 435 436 /* 437 * Inherit processor and processor set bindings from curthread, 438 * unless we're creating a new kernel process, in which case 439 * clear all bindings. 440 */ 441 if (cid == syscid) { 442 t->t_bind_cpu = binding = PBIND_NONE; 443 t->t_cpupart = oldpart = &cp_default; 444 t->t_bind_pset = PS_NONE; 445 t->t_bindflag = (uchar_t)default_binding_mode; 446 } else { 447 binding = curthread->t_bind_cpu; 448 t->t_bind_cpu = binding; 449 oldpart = t->t_cpupart; 450 t->t_cpupart = curthread->t_cpupart; 451 t->t_bind_pset = curthread->t_bind_pset; 452 t->t_bindflag = curthread->t_bindflag | 453 (uchar_t)default_binding_mode; 454 } 455 456 /* 457 * thread_create() initializes this thread's home lgroup to the root. 458 * Choose a more suitable lgroup, since this thread is associated 459 * with an lwp. 460 */ 461 ASSERT(oldpart != NULL); 462 if (binding != PBIND_NONE && t->t_affinitycnt == 0) { 463 t->t_bound_cpu = cpu[binding]; 464 if (t->t_lpl != t->t_bound_cpu->cpu_lpl) 465 lgrp_move_thread(t, t->t_bound_cpu->cpu_lpl, 1); 466 } else { 467 lgrp_move_thread(t, lgrp_choose(t, t->t_cpupart), 1); 468 } 469 470 kpreempt_enable(); 471 472 /* 473 * make sure lpl points to our own partition 474 */ 475 ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads); 476 ASSERT(t->t_lpl < t->t_cpupart->cp_lgrploads + 477 t->t_cpupart->cp_nlgrploads); 478 479 /* 480 * If we're creating a new process, then inherit the project from our 481 * parent. If we're only creating an additional lwp then use the 482 * project pointer of the target process. 483 */ 484 if (p->p_task == NULL) 485 newkpj = ttoproj(curthread); 486 else 487 newkpj = p->p_task->tk_proj; 488 489 /* 490 * It is safe to point the thread to the new project without holding it 491 * since we're holding the target process' p_lock here and therefore 492 * we're guaranteed that it will not move to another project. 493 */ 494 oldkpj = ttoproj(t); 495 if (newkpj != oldkpj) { 496 t->t_proj = newkpj; 497 (void) project_hold(newkpj); 498 project_rele(oldkpj); 499 } 500 501 if (cid != NOCLASS) { 502 /* 503 * If the lwp is being created in the current process 504 * and matches the current thread's scheduling class, 505 * we should propagate the current thread's scheduling 506 * parameters by calling CL_FORK. Otherwise just use 507 * the defaults by calling CL_ENTERCLASS. 508 */ 509 if (p != curproc || curthread->t_cid != cid) { 510 err = CL_ENTERCLASS(t, cid, NULL, NULL, bufp); 511 t->t_pri = pri; /* CL_ENTERCLASS may have changed it */ 512 /* 513 * We don't call schedctl_set_cidpri(t) here 514 * because the schedctl data is not yet set 515 * up for the newly-created lwp. 516 */ 517 } else { 518 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 519 err = CL_FORK(curthread, t, bufp); 520 t->t_cid = cid; 521 } 522 if (err) 523 goto error; 524 else 525 bufp = NULL; 526 } 527 528 /* 529 * If we were given an lwpid then use it, else allocate one. 530 */ 531 if (lwpid != 0) 532 t->t_tid = lwpid; 533 else { 534 /* 535 * lwp/thread id 0 is never valid; reserved for special checks. 536 * lwp/thread id 1 is reserved for the main thread. 537 * Start again at 2 when INT_MAX has been reached 538 * (id_t is a signed 32-bit integer). 539 */ 540 id_t prev_id = p->p_lwpid; /* last allocated tid */ 541 542 do { /* avoid lwpid duplication */ 543 if (p->p_lwpid == INT_MAX) { 544 p->p_flag |= SLWPWRAP; 545 p->p_lwpid = 1; 546 } 547 if ((t->t_tid = ++p->p_lwpid) == prev_id) { 548 /* 549 * All lwpids are allocated; fail the request. 550 */ 551 err = 1; 552 goto error; 553 } 554 /* 555 * We only need to worry about colliding with an id 556 * that's already in use if this process has 557 * cycled through all available lwp ids. 558 */ 559 if ((p->p_flag & SLWPWRAP) == 0) 560 break; 561 } while (lwp_hash_lookup(p, t->t_tid) != NULL); 562 } 563 564 /* 565 * If this is a branded process, let the brand do any necessary lwp 566 * initialization. 567 */ 568 if (PROC_IS_BRANDED(p)) { 569 if (BROP(p)->b_initlwp(lwp)) { 570 err = 1; 571 goto error; 572 } 573 branded = 1; 574 } 575 576 if (t->t_tid == 1) { 577 kpreempt_disable(); 578 ASSERT(t->t_lpl != NULL); 579 p->p_t1_lgrpid = t->t_lpl->lpl_lgrpid; 580 kpreempt_enable(); 581 if (p->p_tr_lgrpid != LGRP_NONE && 582 p->p_tr_lgrpid != p->p_t1_lgrpid) { 583 lgrp_update_trthr_migrations(1); 584 } 585 } 586 587 p->p_lwpcnt++; 588 t->t_waitfor = -1; 589 590 /* 591 * Turn microstate accounting on for thread if on for process. 592 */ 593 if (p->p_flag & SMSACCT) 594 t->t_proc_flag |= TP_MSACCT; 595 596 /* 597 * If the process has watchpoints, mark the new thread as such. 598 */ 599 if (pr_watch_active(p)) 600 watch_enable(t); 601 602 /* 603 * The lwp is being created in the stopped state. 604 * We set all the necessary flags to indicate that fact here. 605 * We omit the TS_CREATE flag from t_schedflag so that the lwp 606 * cannot be set running until the caller is finished with it, 607 * even if lwp_continue() is called on it after we drop p->p_lock. 608 * When the caller is finished with the newly-created lwp, 609 * the caller must call lwp_create_done() to allow the lwp 610 * to be set running. If the TP_HOLDLWP is left set, the 611 * lwp will suspend itself after reaching system call exit. 612 */ 613 init_mstate(t, LMS_STOPPED); 614 t->t_proc_flag |= TP_HOLDLWP; 615 t->t_schedflag |= (TS_ALLSTART & ~(TS_CSTART | TS_CREATE)); 616 t->t_whystop = PR_SUSPENDED; 617 t->t_whatstop = SUSPEND_NORMAL; 618 t->t_sig_check = 1; /* ensure that TP_HOLDLWP is honored */ 619 620 /* 621 * Set system call processing flags in case tracing or profiling 622 * is set. The first system call will evaluate these and turn 623 * them off if they aren't needed. 624 */ 625 t->t_pre_sys = 1; 626 t->t_post_sys = 1; 627 628 /* 629 * Insert the new thread into the list of all threads. 630 */ 631 if ((tx = p->p_tlist) == NULL) { 632 t->t_back = t; 633 t->t_forw = t; 634 p->p_tlist = t; 635 } else { 636 t->t_forw = tx; 637 t->t_back = tx->t_back; 638 tx->t_back->t_forw = t; 639 tx->t_back = t; 640 } 641 642 /* 643 * Insert the new lwp into an lwp directory slot position 644 * and into the lwpid hash table. 645 */ 646 lep->le_thread = t; 647 lep->le_lwpid = t->t_tid; 648 lep->le_start = t->t_start; 649 lwp_hash_in(p, lep, p->p_tidhash, p->p_tidhash_sz, 1); 650 651 if (state == TS_RUN) { 652 /* 653 * We set the new lwp running immediately. 654 */ 655 t->t_proc_flag &= ~TP_HOLDLWP; 656 lwp_create_done(t); 657 } 658 659 error: 660 if (err) { 661 /* 662 * We have failed to create an lwp, so decrement the number 663 * of lwps in the task and let the lgroup load averages know 664 * that this thread isn't going to show up. 665 */ 666 kpreempt_disable(); 667 lgrp_move_thread(t, NULL, 1); 668 kpreempt_enable(); 669 670 ASSERT(MUTEX_HELD(&p->p_lock)); 671 mutex_enter(&p->p_zone->zone_nlwps_lock); 672 p->p_task->tk_nlwps--; 673 p->p_task->tk_proj->kpj_nlwps--; 674 p->p_zone->zone_nlwps--; 675 mutex_exit(&p->p_zone->zone_nlwps_lock); 676 if (cid != NOCLASS && bufp != NULL) 677 CL_FREE(cid, bufp); 678 679 if (branded) 680 BROP(p)->b_freelwp(lwp); 681 682 mutex_exit(&p->p_lock); 683 t->t_state = TS_FREE; 684 thread_rele(t); 685 686 /* 687 * We need to remove t from the list of all threads 688 * because thread_exit()/lwp_exit() isn't called on t. 689 */ 690 mutex_enter(&pidlock); 691 ASSERT(t != t->t_next); /* t0 never exits */ 692 t->t_next->t_prev = t->t_prev; 693 t->t_prev->t_next = t->t_next; 694 mutex_exit(&pidlock); 695 696 thread_free(t); 697 kmem_free(lep, sizeof (*lep)); 698 lwp = NULL; 699 } else { 700 mutex_exit(&p->p_lock); 701 } 702 703 if (old_dir != NULL) 704 kmem_free(old_dir, old_dirsz * sizeof (*old_dir)); 705 if (old_hash != NULL) 706 kmem_free(old_hash, old_hashsz * sizeof (*old_hash)); 707 if (ret_tidhash != NULL) 708 kmem_free(ret_tidhash, sizeof (ret_tidhash_t)); 709 710 DTRACE_PROC1(lwp__create, kthread_t *, t); 711 return (lwp); 712 } 713 714 /* 715 * lwp_create_done() is called by the caller of lwp_create() to set the 716 * newly-created lwp running after the caller has finished manipulating it. 717 */ 718 void 719 lwp_create_done(kthread_t *t) 720 { 721 proc_t *p = ttoproc(t); 722 723 ASSERT(MUTEX_HELD(&p->p_lock)); 724 725 /* 726 * We set the TS_CREATE and TS_CSTART flags and call setrun_locked(). 727 * (The absence of the TS_CREATE flag prevents the lwp from running 728 * until we are finished with it, even if lwp_continue() is called on 729 * it by some other lwp in the process or elsewhere in the kernel.) 730 */ 731 thread_lock(t); 732 ASSERT(t->t_state == TS_STOPPED && !(t->t_schedflag & TS_CREATE)); 733 /* 734 * If TS_CSTART is set, lwp_continue(t) has been called and 735 * has already incremented p_lwprcnt; avoid doing this twice. 736 */ 737 if (!(t->t_schedflag & TS_CSTART)) 738 p->p_lwprcnt++; 739 t->t_schedflag |= (TS_CSTART | TS_CREATE); 740 setrun_locked(t); 741 thread_unlock(t); 742 } 743 744 /* 745 * Copy an LWP's active templates, and clear the latest contracts. 746 */ 747 void 748 lwp_ctmpl_copy(klwp_t *dst, klwp_t *src) 749 { 750 int i; 751 752 for (i = 0; i < ct_ntypes; i++) { 753 dst->lwp_ct_active[i] = ctmpl_dup(src->lwp_ct_active[i]); 754 dst->lwp_ct_latest[i] = NULL; 755 } 756 } 757 758 /* 759 * Clear an LWP's contract template state. 760 */ 761 void 762 lwp_ctmpl_clear(klwp_t *lwp) 763 { 764 ct_template_t *tmpl; 765 int i; 766 767 for (i = 0; i < ct_ntypes; i++) { 768 if ((tmpl = lwp->lwp_ct_active[i]) != NULL) { 769 ctmpl_free(tmpl); 770 lwp->lwp_ct_active[i] = NULL; 771 } 772 773 if (lwp->lwp_ct_latest[i] != NULL) { 774 contract_rele(lwp->lwp_ct_latest[i]); 775 lwp->lwp_ct_latest[i] = NULL; 776 } 777 } 778 } 779 780 /* 781 * Individual lwp exit. 782 * If this is the last lwp, exit the whole process. 783 */ 784 void 785 lwp_exit(void) 786 { 787 kthread_t *t = curthread; 788 klwp_t *lwp = ttolwp(t); 789 proc_t *p = ttoproc(t); 790 791 ASSERT(MUTEX_HELD(&p->p_lock)); 792 793 mutex_exit(&p->p_lock); 794 795 #if defined(__sparc) 796 /* 797 * Ensure that the user stack is fully abandoned.. 798 */ 799 trash_user_windows(); 800 #endif 801 802 tsd_exit(); /* free thread specific data */ 803 804 kcpc_passivate(); /* Clean up performance counter state */ 805 806 pollcleanup(); 807 808 if (t->t_door) 809 door_slam(); 810 811 if (t->t_schedctl != NULL) 812 schedctl_lwp_cleanup(t); 813 814 if (t->t_upimutex != NULL) 815 upimutex_cleanup(); 816 817 /* 818 * Perform any brand specific exit processing, then release any 819 * brand data associated with the lwp 820 */ 821 if (PROC_IS_BRANDED(p)) 822 BROP(p)->b_lwpexit(lwp); 823 824 mutex_enter(&p->p_lock); 825 lwp_cleanup(); 826 827 /* 828 * When this process is dumping core, its lwps are held here 829 * until the core dump is finished. Then exitlwps() is called 830 * again to release these lwps so that they can finish exiting. 831 */ 832 if (p->p_flag & SCOREDUMP) 833 stop(PR_SUSPENDED, SUSPEND_NORMAL); 834 835 /* 836 * Block the process against /proc now that we have really acquired 837 * p->p_lock (to decrement p_lwpcnt and manipulate p_tlist at least). 838 */ 839 prbarrier(p); 840 841 /* 842 * Call proc_exit() if this is the last non-daemon lwp in the process. 843 */ 844 if (!(t->t_proc_flag & TP_DAEMON) && 845 p->p_lwpcnt == p->p_lwpdaemon + 1) { 846 mutex_exit(&p->p_lock); 847 if (proc_exit(CLD_EXITED, 0) == 0) { 848 /* Restarting init. */ 849 return; 850 } 851 852 /* 853 * proc_exit() returns a non-zero value when some other 854 * lwp got there first. We just have to continue in 855 * lwp_exit(). 856 */ 857 mutex_enter(&p->p_lock); 858 ASSERT(curproc->p_flag & SEXITLWPS); 859 prbarrier(p); 860 } 861 862 DTRACE_PROC(lwp__exit); 863 864 /* 865 * If the lwp is a detached lwp or if the process is exiting, 866 * remove (lwp_hash_out()) the lwp from the lwp directory. 867 * Otherwise null out the lwp's le_thread pointer in the lwp 868 * directory so that other threads will see it as a zombie lwp. 869 */ 870 prlwpexit(t); /* notify /proc */ 871 if (!(t->t_proc_flag & TP_TWAIT) || (p->p_flag & SEXITLWPS)) 872 lwp_hash_out(p, t->t_tid); 873 else { 874 ASSERT(!(t->t_proc_flag & TP_DAEMON)); 875 p->p_lwpdir[t->t_dslot].ld_entry->le_thread = NULL; 876 p->p_zombcnt++; 877 cv_broadcast(&p->p_lwpexit); 878 } 879 if (t->t_proc_flag & TP_DAEMON) { 880 p->p_lwpdaemon--; 881 t->t_proc_flag &= ~TP_DAEMON; 882 } 883 t->t_proc_flag &= ~TP_TWAIT; 884 885 /* 886 * Maintain accurate lwp count for task.max-lwps resource control. 887 */ 888 mutex_enter(&p->p_zone->zone_nlwps_lock); 889 p->p_task->tk_nlwps--; 890 p->p_task->tk_proj->kpj_nlwps--; 891 p->p_zone->zone_nlwps--; 892 mutex_exit(&p->p_zone->zone_nlwps_lock); 893 894 CL_EXIT(t); /* tell the scheduler that t is exiting */ 895 ASSERT(p->p_lwpcnt != 0); 896 p->p_lwpcnt--; 897 898 /* 899 * If all remaining non-daemon lwps are waiting in lwp_wait(), 900 * wake them up so someone can return EDEADLK. 901 * (See the block comment preceeding lwp_wait().) 902 */ 903 if (p->p_lwpcnt == p->p_lwpdaemon + (p->p_lwpwait - p->p_lwpdwait)) 904 cv_broadcast(&p->p_lwpexit); 905 906 t->t_proc_flag |= TP_LWPEXIT; 907 term_mstate(t); 908 909 #ifndef NPROBE 910 /* Kernel probe */ 911 if (t->t_tnf_tpdp) 912 tnf_thread_exit(); 913 #endif /* NPROBE */ 914 915 t->t_forw->t_back = t->t_back; 916 t->t_back->t_forw = t->t_forw; 917 if (t == p->p_tlist) 918 p->p_tlist = t->t_forw; 919 920 /* 921 * Clean up the signal state. 922 */ 923 if (t->t_sigqueue != NULL) 924 sigdelq(p, t, 0); 925 if (lwp->lwp_curinfo != NULL) { 926 siginfofree(lwp->lwp_curinfo); 927 lwp->lwp_curinfo = NULL; 928 } 929 930 thread_rele(t); 931 932 /* 933 * Terminated lwps are associated with process zero and are put onto 934 * death-row by resume(). Avoid preemption after resetting t->t_procp. 935 */ 936 t->t_preempt++; 937 938 if (t->t_ctx != NULL) 939 exitctx(t); 940 if (p->p_pctx != NULL) 941 exitpctx(p); 942 943 t->t_procp = &p0; 944 945 /* 946 * Notify the HAT about the change of address space 947 */ 948 hat_thread_exit(t); 949 /* 950 * When this is the last running lwp in this process and some lwp is 951 * waiting for this condition to become true, or this thread was being 952 * suspended, then the waiting lwp is awakened. 953 * 954 * Also, if the process is exiting, we may have a thread waiting in 955 * exitlwps() that needs to be notified. 956 */ 957 if (--p->p_lwprcnt == 0 || (t->t_proc_flag & TP_HOLDLWP) || 958 (p->p_flag & SEXITLWPS)) 959 cv_broadcast(&p->p_holdlwps); 960 961 /* 962 * Need to drop p_lock so we can reacquire pidlock. 963 */ 964 mutex_exit(&p->p_lock); 965 mutex_enter(&pidlock); 966 967 ASSERT(t != t->t_next); /* t0 never exits */ 968 t->t_next->t_prev = t->t_prev; 969 t->t_prev->t_next = t->t_next; 970 cv_broadcast(&t->t_joincv); /* wake up anyone in thread_join */ 971 mutex_exit(&pidlock); 972 973 lwp_pcb_exit(); 974 975 t->t_state = TS_ZOMB; 976 swtch_from_zombie(); 977 /* never returns */ 978 } 979 980 981 /* 982 * Cleanup function for an exiting lwp. 983 * Called both from lwp_exit() and from proc_exit(). 984 * p->p_lock is repeatedly released and grabbed in this function. 985 */ 986 void 987 lwp_cleanup(void) 988 { 989 kthread_t *t = curthread; 990 proc_t *p = ttoproc(t); 991 992 ASSERT(MUTEX_HELD(&p->p_lock)); 993 994 /* untimeout any lwp-bound realtime timers */ 995 if (p->p_itimer != NULL) 996 timer_lwpexit(); 997 998 /* 999 * If this is the /proc agent lwp that is exiting, readjust p_lwpid 1000 * so it appears that the agent never existed, and clear p_agenttp. 1001 */ 1002 if (t == p->p_agenttp) { 1003 ASSERT(t->t_tid == p->p_lwpid); 1004 p->p_lwpid--; 1005 p->p_agenttp = NULL; 1006 } 1007 1008 /* 1009 * Do lgroup bookkeeping to account for thread exiting. 1010 */ 1011 kpreempt_disable(); 1012 lgrp_move_thread(t, NULL, 1); 1013 if (t->t_tid == 1) { 1014 p->p_t1_lgrpid = LGRP_NONE; 1015 } 1016 kpreempt_enable(); 1017 1018 lwp_ctmpl_clear(ttolwp(t)); 1019 } 1020 1021 int 1022 lwp_suspend(kthread_t *t) 1023 { 1024 int tid; 1025 proc_t *p = ttoproc(t); 1026 1027 ASSERT(MUTEX_HELD(&p->p_lock)); 1028 1029 /* 1030 * Set the thread's TP_HOLDLWP flag so it will stop in holdlwp(). 1031 * If an lwp is stopping itself, there is no need to wait. 1032 */ 1033 top: 1034 t->t_proc_flag |= TP_HOLDLWP; 1035 if (t == curthread) { 1036 t->t_sig_check = 1; 1037 } else { 1038 /* 1039 * Make sure the lwp stops promptly. 1040 */ 1041 thread_lock(t); 1042 t->t_sig_check = 1; 1043 /* 1044 * XXX Should use virtual stop like /proc does instead of 1045 * XXX waking the thread to get it to stop. 1046 */ 1047 if (ISWAKEABLE(t) || ISWAITING(t)) { 1048 setrun_locked(t); 1049 } else if (t->t_state == TS_ONPROC && t->t_cpu != CPU) { 1050 poke_cpu(t->t_cpu->cpu_id); 1051 } 1052 1053 tid = t->t_tid; /* remember thread ID */ 1054 /* 1055 * Wait for lwp to stop 1056 */ 1057 while (!SUSPENDED(t)) { 1058 /* 1059 * Drop the thread lock before waiting and reacquire it 1060 * afterwards, so the thread can change its t_state 1061 * field. 1062 */ 1063 thread_unlock(t); 1064 1065 /* 1066 * Check if aborted by exitlwps(). 1067 */ 1068 if (p->p_flag & SEXITLWPS) 1069 lwp_exit(); 1070 1071 /* 1072 * Cooperate with jobcontrol signals and /proc stopping 1073 * by calling cv_wait_sig() to wait for the target 1074 * lwp to stop. Just using cv_wait() can lead to 1075 * deadlock because, if some other lwp has stopped 1076 * by either of these mechanisms, then p_lwprcnt will 1077 * never become zero if we do a cv_wait(). 1078 */ 1079 if (!cv_wait_sig(&p->p_holdlwps, &p->p_lock)) 1080 return (EINTR); 1081 1082 /* 1083 * Check to see if thread died while we were 1084 * waiting for it to suspend. 1085 */ 1086 if (idtot(p, tid) == NULL) 1087 return (ESRCH); 1088 1089 thread_lock(t); 1090 /* 1091 * If the TP_HOLDLWP flag went away, lwp_continue() 1092 * or vfork() must have been called while we were 1093 * waiting, so start over again. 1094 */ 1095 if ((t->t_proc_flag & TP_HOLDLWP) == 0) { 1096 thread_unlock(t); 1097 goto top; 1098 } 1099 } 1100 thread_unlock(t); 1101 } 1102 return (0); 1103 } 1104 1105 /* 1106 * continue a lwp that's been stopped by lwp_suspend(). 1107 */ 1108 void 1109 lwp_continue(kthread_t *t) 1110 { 1111 proc_t *p = ttoproc(t); 1112 int was_suspended = t->t_proc_flag & TP_HOLDLWP; 1113 1114 ASSERT(MUTEX_HELD(&p->p_lock)); 1115 1116 t->t_proc_flag &= ~TP_HOLDLWP; 1117 thread_lock(t); 1118 if (SUSPENDED(t) && 1119 !(p->p_flag & (SHOLDFORK | SHOLDFORK1 | SHOLDWATCH))) { 1120 p->p_lwprcnt++; 1121 t->t_schedflag |= TS_CSTART; 1122 setrun_locked(t); 1123 } 1124 thread_unlock(t); 1125 /* 1126 * Wakeup anyone waiting for this thread to be suspended 1127 */ 1128 if (was_suspended) 1129 cv_broadcast(&p->p_holdlwps); 1130 } 1131 1132 /* 1133 * ******************************** 1134 * Miscellaneous lwp routines * 1135 * ******************************** 1136 */ 1137 /* 1138 * When a process is undergoing a forkall(), its p_flag is set to SHOLDFORK. 1139 * This will cause the process's lwps to stop at a hold point. A hold 1140 * point is where a kernel thread has a flat stack. This is at the 1141 * return from a system call and at the return from a user level trap. 1142 * 1143 * When a process is undergoing a fork1() or vfork(), its p_flag is set to 1144 * SHOLDFORK1. This will cause the process's lwps to stop at a modified 1145 * hold point. The lwps in the process are not being cloned, so they 1146 * are held at the usual hold points and also within issig_forreal(). 1147 * This has the side-effect that their system calls do not return 1148 * showing EINTR. 1149 * 1150 * An lwp can also be held. This is identified by the TP_HOLDLWP flag on 1151 * the thread. The TP_HOLDLWP flag is set in lwp_suspend(), where the active 1152 * lwp is waiting for the target lwp to be stopped. 1153 */ 1154 void 1155 holdlwp(void) 1156 { 1157 proc_t *p = curproc; 1158 kthread_t *t = curthread; 1159 1160 mutex_enter(&p->p_lock); 1161 /* 1162 * Don't terminate immediately if the process is dumping core. 1163 * Once the process has dumped core, all lwps are terminated. 1164 */ 1165 if (!(p->p_flag & SCOREDUMP)) { 1166 if ((p->p_flag & SEXITLWPS) || (t->t_proc_flag & TP_EXITLWP)) 1167 lwp_exit(); 1168 } 1169 if (!(ISHOLD(p)) && !(p->p_flag & (SHOLDFORK1 | SHOLDWATCH))) { 1170 mutex_exit(&p->p_lock); 1171 return; 1172 } 1173 /* 1174 * stop() decrements p->p_lwprcnt and cv_signal()s &p->p_holdlwps 1175 * when p->p_lwprcnt becomes zero. 1176 */ 1177 stop(PR_SUSPENDED, SUSPEND_NORMAL); 1178 if (p->p_flag & SEXITLWPS) 1179 lwp_exit(); 1180 mutex_exit(&p->p_lock); 1181 } 1182 1183 /* 1184 * Have all lwps within the process hold at a point where they are 1185 * cloneable (SHOLDFORK) or just safe w.r.t. fork1 (SHOLDFORK1). 1186 */ 1187 int 1188 holdlwps(int holdflag) 1189 { 1190 proc_t *p = curproc; 1191 1192 ASSERT(holdflag == SHOLDFORK || holdflag == SHOLDFORK1); 1193 mutex_enter(&p->p_lock); 1194 schedctl_finish_sigblock(curthread); 1195 again: 1196 while (p->p_flag & (SEXITLWPS | SHOLDFORK | SHOLDFORK1 | SHOLDWATCH)) { 1197 /* 1198 * If another lwp is doing a forkall() or proc_exit(), bail out. 1199 */ 1200 if (p->p_flag & (SEXITLWPS | SHOLDFORK)) { 1201 mutex_exit(&p->p_lock); 1202 return (0); 1203 } 1204 /* 1205 * Another lwp is doing a fork1() or is undergoing 1206 * watchpoint activity. We hold here for it to complete. 1207 */ 1208 stop(PR_SUSPENDED, SUSPEND_NORMAL); 1209 } 1210 p->p_flag |= holdflag; 1211 pokelwps(p); 1212 --p->p_lwprcnt; 1213 /* 1214 * Wait for the process to become quiescent (p->p_lwprcnt == 0). 1215 */ 1216 while (p->p_lwprcnt > 0) { 1217 /* 1218 * Check if aborted by exitlwps(). 1219 * Also check if SHOLDWATCH is set; it takes precedence. 1220 */ 1221 if (p->p_flag & (SEXITLWPS | SHOLDWATCH)) { 1222 p->p_lwprcnt++; 1223 p->p_flag &= ~holdflag; 1224 cv_broadcast(&p->p_holdlwps); 1225 goto again; 1226 } 1227 /* 1228 * Cooperate with jobcontrol signals and /proc stopping. 1229 * If some other lwp has stopped by either of these 1230 * mechanisms, then p_lwprcnt will never become zero 1231 * and the process will appear deadlocked unless we 1232 * stop here in sympathy with the other lwp before 1233 * doing the cv_wait() below. 1234 * 1235 * If the other lwp stops after we do the cv_wait(), it 1236 * will wake us up to loop around and do the sympathy stop. 1237 * 1238 * Since stop() drops p->p_lock, we must start from 1239 * the top again on returning from stop(). 1240 */ 1241 if (p->p_stopsig | (curthread->t_proc_flag & TP_PRSTOP)) { 1242 int whystop = p->p_stopsig? PR_JOBCONTROL : 1243 PR_REQUESTED; 1244 p->p_lwprcnt++; 1245 p->p_flag &= ~holdflag; 1246 stop(whystop, p->p_stopsig); 1247 goto again; 1248 } 1249 cv_wait(&p->p_holdlwps, &p->p_lock); 1250 } 1251 p->p_lwprcnt++; 1252 p->p_flag &= ~holdflag; 1253 mutex_exit(&p->p_lock); 1254 return (1); 1255 } 1256 1257 /* 1258 * See comments for holdwatch(), below. 1259 */ 1260 static int 1261 holdcheck(int clearflags) 1262 { 1263 proc_t *p = curproc; 1264 1265 /* 1266 * If we are trying to exit, that takes precedence over anything else. 1267 */ 1268 if (p->p_flag & SEXITLWPS) { 1269 p->p_lwprcnt++; 1270 p->p_flag &= ~clearflags; 1271 lwp_exit(); 1272 } 1273 1274 /* 1275 * If another thread is calling fork1(), stop the current thread so the 1276 * other can complete. 1277 */ 1278 if (p->p_flag & SHOLDFORK1) { 1279 p->p_lwprcnt++; 1280 stop(PR_SUSPENDED, SUSPEND_NORMAL); 1281 if (p->p_flag & SEXITLWPS) { 1282 p->p_flag &= ~clearflags; 1283 lwp_exit(); 1284 } 1285 return (-1); 1286 } 1287 1288 /* 1289 * If another thread is calling fork(), then indicate we are doing 1290 * watchpoint activity. This will cause holdlwps() above to stop the 1291 * forking thread, at which point we can continue with watchpoint 1292 * activity. 1293 */ 1294 if (p->p_flag & SHOLDFORK) { 1295 p->p_lwprcnt++; 1296 while (p->p_flag & SHOLDFORK) { 1297 p->p_flag |= SHOLDWATCH; 1298 cv_broadcast(&p->p_holdlwps); 1299 cv_wait(&p->p_holdlwps, &p->p_lock); 1300 p->p_flag &= ~SHOLDWATCH; 1301 } 1302 return (-1); 1303 } 1304 1305 return (0); 1306 } 1307 1308 /* 1309 * Stop all lwps within the process, holding themselves in the kernel while the 1310 * active lwp undergoes watchpoint activity. This is more complicated than 1311 * expected because stop() relies on calling holdwatch() in order to copyin data 1312 * from the user's address space. A double barrier is used to prevent an 1313 * infinite loop. 1314 * 1315 * o The first thread into holdwatch() is the 'master' thread and does 1316 * the following: 1317 * 1318 * - Sets SHOLDWATCH on the current process 1319 * - Sets TP_WATCHSTOP on the current thread 1320 * - Waits for all threads to be either stopped or have 1321 * TP_WATCHSTOP set. 1322 * - Sets the SWATCHOK flag on the process 1323 * - Unsets TP_WATCHSTOP 1324 * - Waits for the other threads to completely stop 1325 * - Unsets SWATCHOK 1326 * 1327 * o If SHOLDWATCH is already set when we enter this function, then another 1328 * thread is already trying to stop this thread. This 'slave' thread 1329 * does the following: 1330 * 1331 * - Sets TP_WATCHSTOP on the current thread 1332 * - Waits for SWATCHOK flag to be set 1333 * - Calls stop() 1334 * 1335 * o If SWATCHOK is set on the process, then this function immediately 1336 * returns, as we must have been called via stop(). 1337 * 1338 * In addition, there are other flags that take precedence over SHOLDWATCH: 1339 * 1340 * o If SEXITLWPS is set, exit immediately. 1341 * 1342 * o If SHOLDFORK1 is set, wait for fork1() to complete. 1343 * 1344 * o If SHOLDFORK is set, then watchpoint activity takes precedence In this 1345 * case, set SHOLDWATCH, signalling the forking thread to stop first. 1346 * 1347 * o If the process is being stopped via /proc (TP_PRSTOP is set), then we 1348 * stop the current thread. 1349 * 1350 * Returns 0 if all threads have been quiesced. Returns non-zero if not all 1351 * threads were stopped, or the list of watched pages has changed. 1352 */ 1353 int 1354 holdwatch(void) 1355 { 1356 proc_t *p = curproc; 1357 kthread_t *t = curthread; 1358 int ret = 0; 1359 1360 mutex_enter(&p->p_lock); 1361 1362 p->p_lwprcnt--; 1363 1364 /* 1365 * Check for bail-out conditions as outlined above. 1366 */ 1367 if (holdcheck(0) != 0) { 1368 mutex_exit(&p->p_lock); 1369 return (-1); 1370 } 1371 1372 if (!(p->p_flag & SHOLDWATCH)) { 1373 /* 1374 * We are the master watchpoint thread. Set SHOLDWATCH and poke 1375 * the other threads. 1376 */ 1377 p->p_flag |= SHOLDWATCH; 1378 pokelwps(p); 1379 1380 /* 1381 * Wait for all threads to be stopped or have TP_WATCHSTOP set. 1382 */ 1383 while (pr_allstopped(p, 1) > 0) { 1384 if (holdcheck(SHOLDWATCH) != 0) { 1385 p->p_flag &= ~SHOLDWATCH; 1386 mutex_exit(&p->p_lock); 1387 return (-1); 1388 } 1389 1390 cv_wait(&p->p_holdlwps, &p->p_lock); 1391 } 1392 1393 /* 1394 * All threads are now stopped or in the process of stopping. 1395 * Set SWATCHOK and let them stop completely. 1396 */ 1397 p->p_flag |= SWATCHOK; 1398 t->t_proc_flag &= ~TP_WATCHSTOP; 1399 cv_broadcast(&p->p_holdlwps); 1400 1401 while (pr_allstopped(p, 0) > 0) { 1402 /* 1403 * At first glance, it may appear that we don't need a 1404 * call to holdcheck() here. But if the process gets a 1405 * SIGKILL signal, one of our stopped threads may have 1406 * been awakened and is waiting in exitlwps(), which 1407 * takes precedence over watchpoints. 1408 */ 1409 if (holdcheck(SHOLDWATCH | SWATCHOK) != 0) { 1410 p->p_flag &= ~(SHOLDWATCH | SWATCHOK); 1411 mutex_exit(&p->p_lock); 1412 return (-1); 1413 } 1414 1415 cv_wait(&p->p_holdlwps, &p->p_lock); 1416 } 1417 1418 /* 1419 * All threads are now completely stopped. 1420 */ 1421 p->p_flag &= ~SWATCHOK; 1422 p->p_flag &= ~SHOLDWATCH; 1423 p->p_lwprcnt++; 1424 1425 } else if (!(p->p_flag & SWATCHOK)) { 1426 1427 /* 1428 * SHOLDWATCH is set, so another thread is trying to do 1429 * watchpoint activity. Indicate this thread is stopping, and 1430 * wait for the OK from the master thread. 1431 */ 1432 t->t_proc_flag |= TP_WATCHSTOP; 1433 cv_broadcast(&p->p_holdlwps); 1434 1435 while (!(p->p_flag & SWATCHOK)) { 1436 if (holdcheck(0) != 0) { 1437 t->t_proc_flag &= ~TP_WATCHSTOP; 1438 mutex_exit(&p->p_lock); 1439 return (-1); 1440 } 1441 1442 cv_wait(&p->p_holdlwps, &p->p_lock); 1443 } 1444 1445 /* 1446 * Once the master thread has given the OK, this thread can 1447 * actually call stop(). 1448 */ 1449 t->t_proc_flag &= ~TP_WATCHSTOP; 1450 p->p_lwprcnt++; 1451 1452 stop(PR_SUSPENDED, SUSPEND_NORMAL); 1453 1454 /* 1455 * It's not OK to do watchpoint activity, notify caller to 1456 * retry. 1457 */ 1458 ret = -1; 1459 1460 } else { 1461 1462 /* 1463 * The only way we can hit the case where SHOLDWATCH is set and 1464 * SWATCHOK is set is if we are triggering this from within a 1465 * stop() call. Assert that this is the case. 1466 */ 1467 1468 ASSERT(t->t_proc_flag & TP_STOPPING); 1469 p->p_lwprcnt++; 1470 } 1471 1472 mutex_exit(&p->p_lock); 1473 1474 return (ret); 1475 } 1476 1477 /* 1478 * force all interruptible lwps to trap into the kernel. 1479 */ 1480 void 1481 pokelwps(proc_t *p) 1482 { 1483 kthread_t *t; 1484 1485 ASSERT(MUTEX_HELD(&p->p_lock)); 1486 1487 t = p->p_tlist; 1488 do { 1489 if (t == curthread) 1490 continue; 1491 thread_lock(t); 1492 aston(t); /* make thread trap or do post_syscall */ 1493 if (ISWAKEABLE(t) || ISWAITING(t)) { 1494 setrun_locked(t); 1495 } else if (t->t_state == TS_STOPPED) { 1496 /* 1497 * Ensure that proc_exit() is not blocked by lwps 1498 * that were stopped via jobcontrol or /proc. 1499 */ 1500 if (p->p_flag & SEXITLWPS) { 1501 p->p_stopsig = 0; 1502 t->t_schedflag |= (TS_XSTART | TS_PSTART); 1503 setrun_locked(t); 1504 } 1505 /* 1506 * If we are holding lwps for a forkall(), 1507 * force lwps that have been suspended via 1508 * lwp_suspend() and are suspended inside 1509 * of a system call to proceed to their 1510 * holdlwp() points where they are clonable. 1511 */ 1512 if ((p->p_flag & SHOLDFORK) && SUSPENDED(t)) { 1513 if ((t->t_schedflag & TS_CSTART) == 0) { 1514 p->p_lwprcnt++; 1515 t->t_schedflag |= TS_CSTART; 1516 setrun_locked(t); 1517 } 1518 } 1519 } else if (t->t_state == TS_ONPROC) { 1520 if (t->t_cpu != CPU) 1521 poke_cpu(t->t_cpu->cpu_id); 1522 } 1523 thread_unlock(t); 1524 } while ((t = t->t_forw) != p->p_tlist); 1525 } 1526 1527 /* 1528 * undo the effects of holdlwps() or holdwatch(). 1529 */ 1530 void 1531 continuelwps(proc_t *p) 1532 { 1533 kthread_t *t; 1534 1535 /* 1536 * If this flag is set, then the original holdwatch() didn't actually 1537 * stop the process. See comments for holdwatch(). 1538 */ 1539 if (p->p_flag & SWATCHOK) { 1540 ASSERT(curthread->t_proc_flag & TP_STOPPING); 1541 return; 1542 } 1543 1544 ASSERT(MUTEX_HELD(&p->p_lock)); 1545 ASSERT((p->p_flag & (SHOLDFORK | SHOLDFORK1 | SHOLDWATCH)) == 0); 1546 1547 t = p->p_tlist; 1548 do { 1549 thread_lock(t); /* SUSPENDED looks at t_schedflag */ 1550 if (SUSPENDED(t) && !(t->t_proc_flag & TP_HOLDLWP)) { 1551 p->p_lwprcnt++; 1552 t->t_schedflag |= TS_CSTART; 1553 setrun_locked(t); 1554 } 1555 thread_unlock(t); 1556 } while ((t = t->t_forw) != p->p_tlist); 1557 } 1558 1559 /* 1560 * Force all other LWPs in the current process other than the caller to exit, 1561 * and then cv_wait() on p_holdlwps for them to exit. The exitlwps() function 1562 * is typically used in these situations: 1563 * 1564 * (a) prior to an exec() system call 1565 * (b) prior to dumping a core file 1566 * (c) prior to a uadmin() shutdown 1567 * 1568 * If the 'coredump' flag is set, other LWPs are quiesced but not destroyed. 1569 * Multiple threads in the process can call this function at one time by 1570 * triggering execs or core dumps simultaneously, so the SEXITLWPS bit is used 1571 * to declare one particular thread the winner who gets to kill the others. 1572 * If a thread wins the exitlwps() dance, zero is returned; otherwise an 1573 * appropriate errno value is returned to caller for its system call to return. 1574 */ 1575 int 1576 exitlwps(int coredump) 1577 { 1578 proc_t *p = curproc; 1579 int heldcnt; 1580 1581 if (curthread->t_door) 1582 door_slam(); 1583 if (p->p_door_list) 1584 door_revoke_all(); 1585 if (curthread->t_schedctl != NULL) 1586 schedctl_lwp_cleanup(curthread); 1587 1588 /* 1589 * Ensure that before starting to wait for other lwps to exit, 1590 * cleanup all upimutexes held by curthread. Otherwise, some other 1591 * lwp could be waiting (uninterruptibly) for a upimutex held by 1592 * curthread, and the call to pokelwps() below would deadlock. 1593 * Even if a blocked upimutex_lock is made interruptible, 1594 * curthread's upimutexes need to be unlocked: do it here. 1595 */ 1596 if (curthread->t_upimutex != NULL) 1597 upimutex_cleanup(); 1598 1599 /* 1600 * Grab p_lock in order to check and set SEXITLWPS to declare a winner. 1601 * We must also block any further /proc access from this point forward. 1602 */ 1603 mutex_enter(&p->p_lock); 1604 prbarrier(p); 1605 1606 if (p->p_flag & SEXITLWPS) { 1607 mutex_exit(&p->p_lock); 1608 aston(curthread); /* force a trip through post_syscall */ 1609 return (set_errno(EINTR)); 1610 } 1611 1612 p->p_flag |= SEXITLWPS; 1613 if (coredump) /* tell other lwps to stop, not exit */ 1614 p->p_flag |= SCOREDUMP; 1615 1616 /* 1617 * Give precedence to exitlwps() if a holdlwps() is 1618 * in progress. The lwp doing the holdlwps() operation 1619 * is aborted when it is awakened. 1620 */ 1621 while (p->p_flag & (SHOLDFORK | SHOLDFORK1 | SHOLDWATCH)) { 1622 cv_broadcast(&p->p_holdlwps); 1623 cv_wait(&p->p_holdlwps, &p->p_lock); 1624 prbarrier(p); 1625 } 1626 p->p_flag |= SHOLDFORK; 1627 pokelwps(p); 1628 1629 /* 1630 * Wait for process to become quiescent. 1631 */ 1632 --p->p_lwprcnt; 1633 while (p->p_lwprcnt > 0) { 1634 cv_wait(&p->p_holdlwps, &p->p_lock); 1635 prbarrier(p); 1636 } 1637 p->p_lwprcnt++; 1638 ASSERT(p->p_lwprcnt == 1); 1639 1640 /* 1641 * The SCOREDUMP flag puts the process into a quiescent 1642 * state. The process's lwps remain attached to this 1643 * process until exitlwps() is called again without the 1644 * 'coredump' flag set, then the lwps are terminated 1645 * and the process can exit. 1646 */ 1647 if (coredump) { 1648 p->p_flag &= ~(SCOREDUMP | SHOLDFORK | SEXITLWPS); 1649 goto out; 1650 } 1651 1652 /* 1653 * Determine if there are any lwps left dangling in 1654 * the stopped state. This happens when exitlwps() 1655 * aborts a holdlwps() operation. 1656 */ 1657 p->p_flag &= ~SHOLDFORK; 1658 if ((heldcnt = p->p_lwpcnt) > 1) { 1659 kthread_t *t; 1660 for (t = curthread->t_forw; --heldcnt > 0; t = t->t_forw) { 1661 t->t_proc_flag &= ~TP_TWAIT; 1662 lwp_continue(t); 1663 } 1664 } 1665 1666 /* 1667 * Wait for all other lwps to exit. 1668 */ 1669 --p->p_lwprcnt; 1670 while (p->p_lwpcnt > 1) { 1671 cv_wait(&p->p_holdlwps, &p->p_lock); 1672 prbarrier(p); 1673 } 1674 ++p->p_lwprcnt; 1675 ASSERT(p->p_lwpcnt == 1 && p->p_lwprcnt == 1); 1676 1677 p->p_flag &= ~SEXITLWPS; 1678 curthread->t_proc_flag &= ~TP_TWAIT; 1679 1680 out: 1681 if (!coredump && p->p_zombcnt) { /* cleanup the zombie lwps */ 1682 lwpdir_t *ldp; 1683 lwpent_t *lep; 1684 int i; 1685 1686 for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) { 1687 lep = ldp->ld_entry; 1688 if (lep != NULL && lep->le_thread != curthread) { 1689 ASSERT(lep->le_thread == NULL); 1690 p->p_zombcnt--; 1691 lwp_hash_out(p, lep->le_lwpid); 1692 } 1693 } 1694 ASSERT(p->p_zombcnt == 0); 1695 } 1696 1697 /* 1698 * If some other LWP in the process wanted us to suspend ourself, 1699 * then we will not do it. The other LWP is now terminated and 1700 * no one will ever continue us again if we suspend ourself. 1701 */ 1702 curthread->t_proc_flag &= ~TP_HOLDLWP; 1703 p->p_flag &= ~(SHOLDFORK | SHOLDFORK1 | SHOLDWATCH | SLWPWRAP); 1704 mutex_exit(&p->p_lock); 1705 return (0); 1706 } 1707 1708 /* 1709 * duplicate a lwp. 1710 */ 1711 klwp_t * 1712 forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) 1713 { 1714 klwp_t *clwp; 1715 void *tregs, *tfpu; 1716 kthread_t *t = lwptot(lwp); 1717 kthread_t *ct; 1718 proc_t *p = lwptoproc(lwp); 1719 int cid; 1720 void *bufp; 1721 void *brand_data; 1722 int val; 1723 1724 ASSERT(p == curproc); 1725 ASSERT(t == curthread || (SUSPENDED(t) && lwp->lwp_asleep == 0)); 1726 1727 #if defined(__sparc) 1728 if (t == curthread) 1729 (void) flush_user_windows_to_stack(NULL); 1730 #endif 1731 1732 if (t == curthread) 1733 /* copy args out of registers first */ 1734 (void) save_syscall_args(); 1735 1736 clwp = lwp_create(cp->p_lwpcnt == 0 ? lwp_rtt_initial : lwp_rtt, 1737 NULL, 0, cp, TS_STOPPED, t->t_pri, &t->t_hold, NOCLASS, lwpid); 1738 if (clwp == NULL) 1739 return (NULL); 1740 1741 /* 1742 * most of the parent's lwp can be copied to its duplicate, 1743 * except for the fields that are unique to each lwp, like 1744 * lwp_thread, lwp_procp, lwp_regs, and lwp_ap. 1745 */ 1746 ct = clwp->lwp_thread; 1747 tregs = clwp->lwp_regs; 1748 tfpu = clwp->lwp_fpu; 1749 brand_data = clwp->lwp_brand; 1750 1751 /* 1752 * Copy parent lwp to child lwp. Hold child's p_lock to prevent 1753 * mstate_aggr_state() from reading stale mstate entries copied 1754 * from lwp to clwp. 1755 */ 1756 mutex_enter(&cp->p_lock); 1757 *clwp = *lwp; 1758 1759 /* clear microstate and resource usage data in new lwp */ 1760 init_mstate(ct, LMS_STOPPED); 1761 bzero(&clwp->lwp_ru, sizeof (clwp->lwp_ru)); 1762 mutex_exit(&cp->p_lock); 1763 1764 /* fix up child's lwp */ 1765 1766 clwp->lwp_pcb.pcb_flags = 0; 1767 #if defined(__sparc) 1768 clwp->lwp_pcb.pcb_step = STEP_NONE; 1769 #endif 1770 clwp->lwp_cursig = 0; 1771 clwp->lwp_extsig = 0; 1772 clwp->lwp_curinfo = (struct sigqueue *)0; 1773 clwp->lwp_thread = ct; 1774 ct->t_sysnum = t->t_sysnum; 1775 clwp->lwp_regs = tregs; 1776 clwp->lwp_fpu = tfpu; 1777 clwp->lwp_brand = brand_data; 1778 clwp->lwp_ap = clwp->lwp_arg; 1779 clwp->lwp_procp = cp; 1780 bzero(clwp->lwp_timer, sizeof (clwp->lwp_timer)); 1781 clwp->lwp_lastfault = 0; 1782 clwp->lwp_lastfaddr = 0; 1783 1784 /* copy parent's struct regs to child. */ 1785 lwp_forkregs(lwp, clwp); 1786 1787 /* 1788 * Fork thread context ops, if any. 1789 */ 1790 if (t->t_ctx) 1791 forkctx(t, ct); 1792 1793 /* fix door state in the child */ 1794 if (t->t_door) 1795 door_fork(t, ct); 1796 1797 /* copy current contract templates, clear latest contracts */ 1798 lwp_ctmpl_copy(clwp, lwp); 1799 1800 mutex_enter(&cp->p_lock); 1801 /* lwp_create() set the TP_HOLDLWP flag */ 1802 if (!(t->t_proc_flag & TP_HOLDLWP)) 1803 ct->t_proc_flag &= ~TP_HOLDLWP; 1804 if (cp->p_flag & SMSACCT) 1805 ct->t_proc_flag |= TP_MSACCT; 1806 mutex_exit(&cp->p_lock); 1807 1808 /* Allow brand to propagate brand-specific state */ 1809 if (PROC_IS_BRANDED(p)) 1810 BROP(p)->b_forklwp(lwp, clwp); 1811 1812 retry: 1813 cid = t->t_cid; 1814 1815 val = CL_ALLOC(&bufp, cid, KM_SLEEP); 1816 ASSERT(val == 0); 1817 1818 mutex_enter(&p->p_lock); 1819 if (cid != t->t_cid) { 1820 /* 1821 * Someone just changed this thread's scheduling class, 1822 * so try pre-allocating the buffer again. Hopefully we 1823 * don't hit this often. 1824 */ 1825 mutex_exit(&p->p_lock); 1826 CL_FREE(cid, bufp); 1827 goto retry; 1828 } 1829 1830 ct->t_unpark = t->t_unpark; 1831 ct->t_clfuncs = t->t_clfuncs; 1832 CL_FORK(t, ct, bufp); 1833 ct->t_cid = t->t_cid; /* after data allocated so prgetpsinfo works */ 1834 mutex_exit(&p->p_lock); 1835 1836 return (clwp); 1837 } 1838 1839 /* 1840 * Add a new lwp entry to the lwp directory and to the lwpid hash table. 1841 */ 1842 void 1843 lwp_hash_in(proc_t *p, lwpent_t *lep, tidhash_t *tidhash, uint_t tidhash_sz, 1844 int do_lock) 1845 { 1846 tidhash_t *thp = &tidhash[TIDHASH(lep->le_lwpid, tidhash_sz)]; 1847 lwpdir_t **ldpp; 1848 lwpdir_t *ldp; 1849 kthread_t *t; 1850 1851 /* 1852 * Allocate a directory element from the free list. 1853 * Code elsewhere guarantees a free slot. 1854 */ 1855 ldp = p->p_lwpfree; 1856 p->p_lwpfree = ldp->ld_next; 1857 ASSERT(ldp->ld_entry == NULL); 1858 ldp->ld_entry = lep; 1859 1860 if (do_lock) 1861 mutex_enter(&thp->th_lock); 1862 1863 /* 1864 * Insert it into the lwpid hash table. 1865 */ 1866 ldpp = &thp->th_list; 1867 ldp->ld_next = *ldpp; 1868 *ldpp = ldp; 1869 1870 /* 1871 * Set the active thread's directory slot entry. 1872 */ 1873 if ((t = lep->le_thread) != NULL) { 1874 ASSERT(lep->le_lwpid == t->t_tid); 1875 t->t_dslot = (int)(ldp - p->p_lwpdir); 1876 } 1877 1878 if (do_lock) 1879 mutex_exit(&thp->th_lock); 1880 } 1881 1882 /* 1883 * Remove an lwp from the lwpid hash table and free its directory entry. 1884 * This is done when a detached lwp exits in lwp_exit() or 1885 * when a non-detached lwp is waited for in lwp_wait() or 1886 * when a zombie lwp is detached in lwp_detach(). 1887 */ 1888 void 1889 lwp_hash_out(proc_t *p, id_t lwpid) 1890 { 1891 tidhash_t *thp = &p->p_tidhash[TIDHASH(lwpid, p->p_tidhash_sz)]; 1892 lwpdir_t **ldpp; 1893 lwpdir_t *ldp; 1894 lwpent_t *lep; 1895 1896 mutex_enter(&thp->th_lock); 1897 for (ldpp = &thp->th_list; 1898 (ldp = *ldpp) != NULL; ldpp = &ldp->ld_next) { 1899 lep = ldp->ld_entry; 1900 if (lep->le_lwpid == lwpid) { 1901 prlwpfree(p, lep); /* /proc deals with le_trace */ 1902 *ldpp = ldp->ld_next; 1903 ldp->ld_entry = NULL; 1904 ldp->ld_next = p->p_lwpfree; 1905 p->p_lwpfree = ldp; 1906 kmem_free(lep, sizeof (*lep)); 1907 break; 1908 } 1909 } 1910 mutex_exit(&thp->th_lock); 1911 } 1912 1913 /* 1914 * Lookup an lwp in the lwpid hash table by lwpid. 1915 */ 1916 lwpdir_t * 1917 lwp_hash_lookup(proc_t *p, id_t lwpid) 1918 { 1919 tidhash_t *thp; 1920 lwpdir_t *ldp; 1921 1922 /* 1923 * The process may be exiting, after p_tidhash has been set to NULL in 1924 * proc_exit() but before prfee() has been called. Return failure in 1925 * this case. 1926 */ 1927 if (p->p_tidhash == NULL) 1928 return (NULL); 1929 1930 thp = &p->p_tidhash[TIDHASH(lwpid, p->p_tidhash_sz)]; 1931 for (ldp = thp->th_list; ldp != NULL; ldp = ldp->ld_next) { 1932 if (ldp->ld_entry->le_lwpid == lwpid) 1933 return (ldp); 1934 } 1935 1936 return (NULL); 1937 } 1938 1939 /* 1940 * Same as lwp_hash_lookup(), but acquire and return 1941 * the tid hash table entry lock on success. 1942 */ 1943 lwpdir_t * 1944 lwp_hash_lookup_and_lock(proc_t *p, id_t lwpid, kmutex_t **mpp) 1945 { 1946 tidhash_t *tidhash; 1947 uint_t tidhash_sz; 1948 tidhash_t *thp; 1949 lwpdir_t *ldp; 1950 1951 top: 1952 tidhash_sz = p->p_tidhash_sz; 1953 membar_consumer(); 1954 if ((tidhash = p->p_tidhash) == NULL) 1955 return (NULL); 1956 1957 thp = &tidhash[TIDHASH(lwpid, tidhash_sz)]; 1958 mutex_enter(&thp->th_lock); 1959 1960 /* 1961 * Since we are not holding p->p_lock, the tid hash table 1962 * may have changed. If so, start over. If not, then 1963 * it cannot change until after we drop &thp->th_lock; 1964 */ 1965 if (tidhash != p->p_tidhash || tidhash_sz != p->p_tidhash_sz) { 1966 mutex_exit(&thp->th_lock); 1967 goto top; 1968 } 1969 1970 for (ldp = thp->th_list; ldp != NULL; ldp = ldp->ld_next) { 1971 if (ldp->ld_entry->le_lwpid == lwpid) { 1972 *mpp = &thp->th_lock; 1973 return (ldp); 1974 } 1975 } 1976 1977 mutex_exit(&thp->th_lock); 1978 return (NULL); 1979 } 1980 1981 /* 1982 * Update the indicated LWP usage statistic for the current LWP. 1983 */ 1984 void 1985 lwp_stat_update(lwp_stat_id_t lwp_stat_id, long inc) 1986 { 1987 klwp_t *lwp = ttolwp(curthread); 1988 1989 if (lwp == NULL) 1990 return; 1991 1992 switch (lwp_stat_id) { 1993 case LWP_STAT_INBLK: 1994 lwp->lwp_ru.inblock += inc; 1995 break; 1996 case LWP_STAT_OUBLK: 1997 lwp->lwp_ru.oublock += inc; 1998 break; 1999 case LWP_STAT_MSGRCV: 2000 lwp->lwp_ru.msgrcv += inc; 2001 break; 2002 case LWP_STAT_MSGSND: 2003 lwp->lwp_ru.msgsnd += inc; 2004 break; 2005 default: 2006 panic("lwp_stat_update: invalid lwp_stat_id 0x%x", lwp_stat_id); 2007 } 2008 } 2009