1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/atomic.h> 29 #include <sys/cmn_err.h> 30 #include <sys/exacct.h> 31 #include <sys/id_space.h> 32 #include <sys/kmem.h> 33 #include <sys/modhash.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/project.h> 37 #include <sys/rctl.h> 38 #include <sys/systm.h> 39 #include <sys/task.h> 40 #include <sys/time.h> 41 #include <sys/types.h> 42 #include <sys/zone.h> 43 #include <sys/cpuvar.h> 44 #include <sys/fss.h> 45 #include <sys/class.h> 46 #include <sys/project.h> 47 48 /* 49 * Tasks 50 * 51 * A task is a collection of processes, associated with a common project ID 52 * and related by a common initial parent. The task primarily represents a 53 * natural process sequence with known resource usage, although it can also be 54 * viewed as a convenient grouping of processes for signal delivery, processor 55 * binding, and administrative operations. 56 * 57 * Membership and observership 58 * We can conceive of situations where processes outside of the task may wish 59 * to examine the resource usage of the task. Similarly, a number of the 60 * administrative operations on a task can be performed by processes who are 61 * not members of the task. Accordingly, we must design a locking strategy 62 * where observers of the task, who wish to examine or operate on the task, 63 * and members of task, who can perform the mentioned operations, as well as 64 * leave the task, see a consistent and correct representation of the task at 65 * all times. 66 * 67 * Locking 68 * Because the task membership is a new relation between processes, its 69 * locking becomes an additional responsibility of the pidlock/p_lock locking 70 * sequence; however, tasks closely resemble sessions and the session locking 71 * model is mostly appropriate for the interaction of tasks, processes, and 72 * procfs. 73 * 74 * kmutex_t task_hash_lock 75 * task_hash_lock is a global lock protecting the contents of the task 76 * ID-to-task pointer hash. Holders of task_hash_lock must not attempt to 77 * acquire pidlock or p_lock. 78 * uint_t tk_hold_count 79 * tk_hold_count, the number of members and observers of the current task, 80 * must be manipulated atomically. 81 * proc_t *tk_memb_list 82 * proc_t *p_tasknext 83 * proc_t *p_taskprev 84 * The task's membership list is protected by pidlock, and is therefore 85 * always acquired before any of its members' p_lock mutexes. The p_task 86 * member of the proc structure is protected by pidlock or p_lock for 87 * reading, and by both pidlock and p_lock for modification, as is done for 88 * p_sessp. The key point is that only the process can modify its p_task, 89 * and not any entity on the system. (/proc will use prlock() to prevent 90 * the process from leaving, as opposed to pidlock.) 91 * kmutex_t tk_usage_lock 92 * tk_usage_lock is a per-task lock protecting the contents of the task 93 * usage structure and tk_nlwps counter for the task.max-lwps resource 94 * control. 95 */ 96 97 int task_hash_size = 256; 98 static kmutex_t task_hash_lock; 99 static mod_hash_t *task_hash; 100 101 static id_space_t *taskid_space; /* global taskid space */ 102 static kmem_cache_t *task_cache; /* kmem cache for task structures */ 103 104 rctl_hndl_t rc_task_lwps; 105 rctl_hndl_t rc_task_cpu_time; 106 107 /* 108 * static rctl_qty_t task_usage_lwps(void *taskp) 109 * 110 * Overview 111 * task_usage_lwps() is the usage operation for the resource control 112 * associated with the number of LWPs in a task. 113 * 114 * Return values 115 * The number of LWPs in the given task is returned. 116 * 117 * Caller's context 118 * The p->p_lock must be held across the call. 119 */ 120 /*ARGSUSED*/ 121 static rctl_qty_t 122 task_lwps_usage(rctl_t *r, proc_t *p) 123 { 124 task_t *t; 125 rctl_qty_t nlwps; 126 127 ASSERT(MUTEX_HELD(&p->p_lock)); 128 129 t = p->p_task; 130 mutex_enter(&p->p_zone->zone_nlwps_lock); 131 nlwps = t->tk_nlwps; 132 mutex_exit(&p->p_zone->zone_nlwps_lock); 133 134 return (nlwps); 135 } 136 137 /* 138 * static int task_test_lwps(void *taskp, rctl_val_t *, int64_t incr, 139 * int flags) 140 * 141 * Overview 142 * task_test_lwps() is the test-if-valid-increment for the resource control 143 * for the number of processes in a task. 144 * 145 * Return values 146 * 0 if the threshold limit was not passed, 1 if the limit was passed. 147 * 148 * Caller's context 149 * p->p_lock must be held across the call. 150 */ 151 /*ARGSUSED*/ 152 static int 153 task_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl, 154 rctl_qty_t incr, 155 uint_t flags) 156 { 157 rctl_qty_t nlwps; 158 159 ASSERT(MUTEX_HELD(&p->p_lock)); 160 ASSERT(e->rcep_t == RCENTITY_TASK); 161 if (e->rcep_p.task == NULL) 162 return (0); 163 164 ASSERT(MUTEX_HELD(&(e->rcep_p.task->tk_zone->zone_nlwps_lock))); 165 nlwps = e->rcep_p.task->tk_nlwps; 166 167 if (nlwps + incr > rcntl->rcv_value) 168 return (1); 169 170 return (0); 171 } 172 /*ARGSUSED*/ 173 static int 174 task_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) { 175 176 ASSERT(MUTEX_HELD(&p->p_lock)); 177 ASSERT(e->rcep_t == RCENTITY_TASK); 178 if (e->rcep_p.task == NULL) 179 return (0); 180 181 e->rcep_p.task->tk_nlwps_ctl = nv; 182 return (0); 183 } 184 185 /* 186 * static rctl_qty_t task_usage_cpu_secs(void *taskp) 187 * 188 * Overview 189 * task_usage_cpu_secs() is the usage operation for the resource control 190 * associated with the total accrued CPU seconds for a task. 191 * 192 * Return values 193 * The number of CPU seconds consumed by the task is returned. 194 * 195 * Caller's context 196 * The given task must be held across the call. 197 */ 198 /*ARGSUSED*/ 199 static rctl_qty_t 200 task_cpu_time_usage(rctl_t *r, proc_t *p) 201 { 202 task_t *t = p->p_task; 203 204 ASSERT(MUTEX_HELD(&p->p_lock)); 205 return (t->tk_cpu_time / hz); 206 } 207 208 /* 209 * static int task_test_cpu_secs(void *taskp, rctl_val_t *, int64_t incr, 210 * int flags) 211 * 212 * Overview 213 * task_test_cpu_secs() is the test-if-valid-increment for the resource 214 * control for the total accrued CPU seconds for a task. 215 * 216 * Return values 217 * 0 if the threshold limit was not passed, 1 if the limit was passed. 218 * 219 * Caller's context 220 * The given task must be held across the call. 221 */ 222 /*ARGSUSED*/ 223 static int 224 task_cpu_time_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, 225 struct rctl_val *rcntl, rctl_qty_t incr, uint_t flags) 226 { 227 task_t *t; 228 229 ASSERT(MUTEX_HELD(&p->p_lock)); 230 ASSERT(e->rcep_t == RCENTITY_TASK); 231 if (e->rcep_p.task == NULL) 232 return (0); 233 234 t = e->rcep_p.task; 235 if ((t->tk_cpu_time + incr) / hz >= rcntl->rcv_value) 236 return (1); 237 238 return (0); 239 } 240 241 static task_t * 242 task_find(taskid_t id, zoneid_t zoneid) 243 { 244 task_t *tk; 245 246 ASSERT(MUTEX_HELD(&task_hash_lock)); 247 248 if (mod_hash_find(task_hash, (mod_hash_key_t)(uintptr_t)id, 249 (mod_hash_val_t *)&tk) == MH_ERR_NOTFOUND || 250 (zoneid != ALL_ZONES && zoneid != tk->tk_zone->zone_id)) 251 return (NULL); 252 253 return (tk); 254 } 255 256 /* 257 * task_hold_by_id(), task_hold_by_id_zone() 258 * 259 * Overview 260 * task_hold_by_id() is used to take a reference on a task by its task id, 261 * supporting the various system call interfaces for obtaining resource data, 262 * delivering signals, and so forth. 263 * 264 * Return values 265 * Returns a pointer to the task_t with taskid_t id. The task is returned 266 * with its hold count incremented by one. Returns NULL if there 267 * is no task with the requested id. 268 * 269 * Caller's context 270 * Caller must not be holding task_hash_lock. No restrictions on context. 271 */ 272 task_t * 273 task_hold_by_id_zone(taskid_t id, zoneid_t zoneid) 274 { 275 task_t *tk; 276 277 mutex_enter(&task_hash_lock); 278 if ((tk = task_find(id, zoneid)) != NULL) 279 atomic_add_32(&tk->tk_hold_count, 1); 280 mutex_exit(&task_hash_lock); 281 282 return (tk); 283 } 284 285 task_t * 286 task_hold_by_id(taskid_t id) 287 { 288 zoneid_t zoneid; 289 290 if (INGLOBALZONE(curproc)) 291 zoneid = ALL_ZONES; 292 else 293 zoneid = getzoneid(); 294 return (task_hold_by_id_zone(id, zoneid)); 295 } 296 297 /* 298 * void task_hold(task_t *) 299 * 300 * Overview 301 * task_hold() is used to take an additional reference to the given task. 302 * 303 * Return values 304 * None. 305 * 306 * Caller's context 307 * No restriction on context. 308 */ 309 void 310 task_hold(task_t *tk) 311 { 312 atomic_add_32(&tk->tk_hold_count, 1); 313 } 314 315 /* 316 * void task_rele(task_t *) 317 * 318 * Overview 319 * task_rele() relinquishes a reference on the given task, which was acquired 320 * via task_hold() or task_hold_by_id(). If this is the last member or 321 * observer of the task, dispatch it for commitment via the accounting 322 * subsystem. 323 * 324 * Return values 325 * None. 326 * 327 * Caller's context 328 * Caller must not be holding the task_hash_lock. 329 * Caller's context must be acceptable for KM_SLEEP allocations. 330 */ 331 void 332 task_rele(task_t *tk) 333 { 334 mutex_enter(&task_hash_lock); 335 if (atomic_add_32_nv(&tk->tk_hold_count, -1) > 0) { 336 mutex_exit(&task_hash_lock); 337 return; 338 } 339 340 mutex_enter(&tk->tk_zone->zone_nlwps_lock); 341 tk->tk_proj->kpj_ntasks--; 342 mutex_exit(&tk->tk_zone->zone_nlwps_lock); 343 344 if (mod_hash_destroy(task_hash, 345 (mod_hash_key_t)(uintptr_t)tk->tk_tkid) != 0) 346 panic("unable to delete task %d", tk->tk_tkid); 347 mutex_exit(&task_hash_lock); 348 349 /* 350 * At this point, there are no members or observers of the task, so we 351 * can safely send it on for commitment to the accounting subsystem. 352 * The task will be destroyed in task_end() subsequent to commitment. 353 */ 354 (void) taskq_dispatch(exacct_queue, exacct_commit_task, tk, KM_SLEEP); 355 } 356 357 /* 358 * task_t *task_create(projid_t, zone *) 359 * 360 * Overview 361 * A process constructing a new task calls task_create() to construct and 362 * preinitialize the task for the appropriate destination project. Only one 363 * task, the primordial task0, is not created with task_create(). 364 * 365 * Return values 366 * None. 367 * 368 * Caller's context 369 * Caller's context should be safe for KM_SLEEP allocations. 370 * The caller should appropriately bump the kpj_ntasks counter on the 371 * project that contains this task. 372 */ 373 task_t * 374 task_create(projid_t projid, zone_t *zone) 375 { 376 task_t *tk = kmem_cache_alloc(task_cache, KM_SLEEP); 377 task_t *ancestor_tk; 378 taskid_t tkid; 379 task_usage_t *tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 380 mod_hash_hndl_t hndl; 381 rctl_set_t *set = rctl_set_create(); 382 rctl_alloc_gp_t *gp; 383 rctl_entity_p_t e; 384 385 bzero(tk, sizeof (task_t)); 386 387 tk->tk_tkid = tkid = id_alloc(taskid_space); 388 tk->tk_nlwps = 0; 389 tk->tk_nlwps_ctl = INT_MAX; 390 tk->tk_usage = tu; 391 tk->tk_inherited = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 392 tk->tk_proj = project_hold_by_id(projid, zone, PROJECT_HOLD_INSERT); 393 tk->tk_flags = TASK_NORMAL; 394 395 /* 396 * Copy ancestor task's resource controls. 397 */ 398 zone_task_hold(zone); 399 mutex_enter(&curproc->p_lock); 400 ancestor_tk = curproc->p_task; 401 task_hold(ancestor_tk); 402 tk->tk_zone = zone; 403 mutex_exit(&curproc->p_lock); 404 405 for (;;) { 406 gp = rctl_set_dup_prealloc(ancestor_tk->tk_rctls); 407 408 mutex_enter(&ancestor_tk->tk_rctls->rcs_lock); 409 if (rctl_set_dup_ready(ancestor_tk->tk_rctls, gp)) 410 break; 411 412 mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 413 414 rctl_prealloc_destroy(gp); 415 } 416 417 /* 418 * At this point, curproc does not have the appropriate linkage 419 * through the task to the project. So, rctl_set_dup should only 420 * copy the rctls, and leave the callbacks for later. 421 */ 422 e.rcep_p.task = tk; 423 e.rcep_t = RCENTITY_TASK; 424 tk->tk_rctls = rctl_set_dup(ancestor_tk->tk_rctls, curproc, curproc, &e, 425 set, gp, RCD_DUP); 426 mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 427 428 rctl_prealloc_destroy(gp); 429 430 /* 431 * Record the ancestor task's ID for use by extended accounting. 432 */ 433 tu->tu_anctaskid = ancestor_tk->tk_tkid; 434 task_rele(ancestor_tk); 435 436 /* 437 * Put new task structure in the hash table. 438 */ 439 (void) mod_hash_reserve(task_hash, &hndl); 440 mutex_enter(&task_hash_lock); 441 ASSERT(task_find(tkid, getzoneid()) == NULL); 442 if (mod_hash_insert_reserve(task_hash, (mod_hash_key_t)(uintptr_t)tkid, 443 (mod_hash_val_t *)tk, hndl) != 0) { 444 mod_hash_cancel(task_hash, &hndl); 445 panic("unable to insert task %d(%p)", tkid, (void *)tk); 446 } 447 mutex_exit(&task_hash_lock); 448 449 return (tk); 450 } 451 452 /* 453 * void task_attach(task_t *, proc_t *) 454 * 455 * Overview 456 * task_attach() is used to attach a process to a task; this operation is only 457 * performed as a result of a fork() or settaskid() system call. The proc_t's 458 * p_tasknext and p_taskprev fields will be set such that the proc_t is a 459 * member of the doubly-linked list of proc_t's that make up the task. 460 * 461 * Return values 462 * None. 463 * 464 * Caller's context 465 * pidlock and p->p_lock must be held on entry. 466 */ 467 void 468 task_attach(task_t *tk, proc_t *p) 469 { 470 proc_t *first, *prev; 471 rctl_entity_p_t e; 472 ASSERT(tk != NULL); 473 ASSERT(p != NULL); 474 ASSERT(MUTEX_HELD(&pidlock)); 475 ASSERT(MUTEX_HELD(&p->p_lock)); 476 477 if (tk->tk_memb_list == NULL) { 478 p->p_tasknext = p; 479 p->p_taskprev = p; 480 } else { 481 first = tk->tk_memb_list; 482 prev = first->p_taskprev; 483 first->p_taskprev = p; 484 p->p_tasknext = first; 485 p->p_taskprev = prev; 486 prev->p_tasknext = p; 487 } 488 tk->tk_memb_list = p; 489 task_hold(tk); 490 p->p_task = tk; 491 492 /* 493 * Now that the linkage from process to task and project is 494 * complete, do the required callbacks for the task and project 495 * rctl sets. 496 */ 497 e.rcep_p.proj = tk->tk_proj; 498 e.rcep_t = RCENTITY_PROJECT; 499 (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_proj->kpj_rctls, NULL, 500 RCD_CALLBACK); 501 502 e.rcep_p.task = tk; 503 e.rcep_t = RCENTITY_TASK; 504 (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_rctls, NULL, 505 RCD_CALLBACK); 506 507 } 508 509 /* 510 * task_begin() 511 * 512 * Overview 513 * A process constructing a new task calls task_begin() to initialize the 514 * task, by attaching itself as a member. 515 * 516 * Return values 517 * None. 518 * 519 * Caller's context 520 * pidlock and p_lock must be held across the call to task_begin(). 521 */ 522 void 523 task_begin(task_t *tk, proc_t *p) 524 { 525 timestruc_t ts; 526 task_usage_t *tu; 527 528 ASSERT(MUTEX_HELD(&pidlock)); 529 ASSERT(MUTEX_HELD(&p->p_lock)); 530 531 mutex_enter(&tk->tk_usage_lock); 532 tu = tk->tk_usage; 533 gethrestime(&ts); 534 tu->tu_startsec = (uint64_t)ts.tv_sec; 535 tu->tu_startnsec = (uint64_t)ts.tv_nsec; 536 mutex_exit(&tk->tk_usage_lock); 537 538 /* 539 * Join process to the task as a member. 540 */ 541 task_attach(tk, p); 542 } 543 544 /* 545 * void task_detach(proc_t *) 546 * 547 * Overview 548 * task_detach() removes the specified process from its task. task_detach 549 * sets the process's task membership to NULL, in anticipation of a final exit 550 * or of joining a new task. Because task_rele() requires a context safe for 551 * KM_SLEEP allocations, a task_detach() is followed by a subsequent 552 * task_rele() once appropriate context is available. 553 * 554 * Because task_detach() involves relinquishing the process's membership in 555 * the project, any observational rctls the process may have had on the task 556 * or project are destroyed. 557 * 558 * Return values 559 * None. 560 * 561 * Caller's context 562 * pidlock and p_lock held across task_detach(). 563 */ 564 void 565 task_detach(proc_t *p) 566 { 567 task_t *tk = p->p_task; 568 569 ASSERT(MUTEX_HELD(&pidlock)); 570 ASSERT(MUTEX_HELD(&p->p_lock)); 571 ASSERT(p->p_task != NULL); 572 ASSERT(tk->tk_memb_list != NULL); 573 574 if (tk->tk_memb_list == p) 575 tk->tk_memb_list = p->p_tasknext; 576 if (tk->tk_memb_list == p) 577 tk->tk_memb_list = NULL; 578 p->p_taskprev->p_tasknext = p->p_tasknext; 579 p->p_tasknext->p_taskprev = p->p_taskprev; 580 581 rctl_set_tearoff(p->p_task->tk_rctls, p); 582 rctl_set_tearoff(p->p_task->tk_proj->kpj_rctls, p); 583 584 p->p_task = NULL; 585 p->p_tasknext = p->p_taskprev = NULL; 586 } 587 588 /* 589 * task_change(task_t *, proc_t *) 590 * 591 * Overview 592 * task_change() removes the specified process from its current task. The 593 * process is then attached to the specified task. This routine is called 594 * from settaskid() when process is being moved to a new task. 595 * 596 * Return values 597 * None. 598 * 599 * Caller's context 600 * pidlock and p_lock held across task_change() 601 */ 602 void 603 task_change(task_t *newtk, proc_t *p) 604 { 605 task_t *oldtk = p->p_task; 606 607 ASSERT(MUTEX_HELD(&pidlock)); 608 ASSERT(MUTEX_HELD(&p->p_lock)); 609 ASSERT(oldtk != NULL); 610 ASSERT(oldtk->tk_memb_list != NULL); 611 612 mutex_enter(&p->p_zone->zone_nlwps_lock); 613 oldtk->tk_nlwps -= p->p_lwpcnt; 614 mutex_exit(&p->p_zone->zone_nlwps_lock); 615 616 mutex_enter(&newtk->tk_zone->zone_nlwps_lock); 617 newtk->tk_nlwps += p->p_lwpcnt; 618 mutex_exit(&newtk->tk_zone->zone_nlwps_lock); 619 620 task_detach(p); 621 task_begin(newtk, p); 622 exacct_move_mstate(p, oldtk, newtk); 623 } 624 625 /* 626 * task_end() 627 * 628 * Overview 629 * task_end() contains the actions executed once the final member of 630 * a task has released the task, and all actions connected with the task, such 631 * as committing an accounting record to a file, are completed. It is called 632 * by the known last consumer of the task information. Additionally, 633 * task_end() must never refer to any process in the system. 634 * 635 * Return values 636 * None. 637 * 638 * Caller's context 639 * No restrictions on context, beyond that given above. 640 */ 641 void 642 task_end(task_t *tk) 643 { 644 ASSERT(tk->tk_hold_count == 0); 645 646 project_rele(tk->tk_proj); 647 kmem_free(tk->tk_usage, sizeof (task_usage_t)); 648 kmem_free(tk->tk_inherited, sizeof (task_usage_t)); 649 if (tk->tk_prevusage != NULL) 650 kmem_free(tk->tk_prevusage, sizeof (task_usage_t)); 651 if (tk->tk_zoneusage != NULL) 652 kmem_free(tk->tk_zoneusage, sizeof (task_usage_t)); 653 rctl_set_free(tk->tk_rctls); 654 id_free(taskid_space, tk->tk_tkid); 655 zone_task_rele(tk->tk_zone); 656 kmem_cache_free(task_cache, tk); 657 } 658 659 static void 660 changeproj(proc_t *p, kproject_t *kpj, zone_t *zone, void *projbuf, 661 void *zonebuf) 662 { 663 kproject_t *oldkpj; 664 kthread_t *t; 665 666 ASSERT(MUTEX_HELD(&pidlock)); 667 ASSERT(MUTEX_HELD(&p->p_lock)); 668 669 if ((t = p->p_tlist) != NULL) { 670 do { 671 (void) project_hold(kpj); 672 673 thread_lock(t); 674 oldkpj = ttoproj(t); 675 676 /* 677 * Kick this thread so that he doesn't sit 678 * on a wrong wait queue. 679 */ 680 if (ISWAITING(t)) 681 setrun_locked(t); 682 683 /* 684 * The thread wants to go on the project wait queue, but 685 * the waitq is changing. 686 */ 687 if (t->t_schedflag & TS_PROJWAITQ) 688 t->t_schedflag &= ~ TS_PROJWAITQ; 689 690 t->t_proj = kpj; 691 t->t_pre_sys = 1; /* For cred update */ 692 thread_unlock(t); 693 fss_changeproj(t, kpj, zone, projbuf, zonebuf); 694 695 project_rele(oldkpj); 696 } while ((t = t->t_forw) != p->p_tlist); 697 } 698 } 699 700 /* 701 * task_join() 702 * 703 * Overview 704 * task_join() contains the actions that must be executed when the first 705 * member (curproc) of a newly created task joins it. It may never fail. 706 * 707 * The caller must make sure holdlwps() is called so that all other lwps are 708 * stopped prior to calling this function. 709 * 710 * NB: It returns with curproc->p_lock held. 711 * 712 * Return values 713 * Pointer to the old task. 714 * 715 * Caller's context 716 * cpu_lock must be held entering the function. It will acquire pidlock, 717 * p_crlock and p_lock during execution. 718 */ 719 task_t * 720 task_join(task_t *tk, uint_t flags) 721 { 722 proc_t *p = ttoproc(curthread); 723 task_t *prev_tk; 724 void *projbuf, *zonebuf; 725 zone_t *zone = tk->tk_zone; 726 projid_t projid = tk->tk_proj->kpj_id; 727 cred_t *oldcr; 728 729 /* 730 * We can't know for sure if holdlwps() was called, but we can check to 731 * ensure we're single-threaded. 732 */ 733 ASSERT(curthread == p->p_agenttp || p->p_lwprcnt == 1); 734 735 /* 736 * Changing the credential is always hard because we cannot 737 * allocate memory when holding locks but we don't know whether 738 * we need to change it. We first get a reference to the current 739 * cred if we need to change it. Then we create a credential 740 * with an updated project id. Finally we install it, first 741 * releasing the reference we had on the p_cred at the time we 742 * acquired the lock the first time and later we release the 743 * reference to p_cred at the time we acquired the lock the 744 * second time. 745 */ 746 mutex_enter(&p->p_crlock); 747 if (crgetprojid(p->p_cred) == projid) 748 oldcr = NULL; 749 else 750 crhold(oldcr = p->p_cred); 751 mutex_exit(&p->p_crlock); 752 753 if (oldcr != NULL) { 754 cred_t *newcr = crdup(oldcr); 755 crsetprojid(newcr, projid); 756 crfree(oldcr); 757 758 mutex_enter(&p->p_crlock); 759 oldcr = p->p_cred; 760 p->p_cred = newcr; 761 mutex_exit(&p->p_crlock); 762 crfree(oldcr); 763 } 764 765 /* 766 * Make sure that the number of processor sets is constant 767 * across this operation. 768 */ 769 ASSERT(MUTEX_HELD(&cpu_lock)); 770 771 projbuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_PROJ); 772 zonebuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_ZONE); 773 774 mutex_enter(&pidlock); 775 mutex_enter(&p->p_lock); 776 777 prev_tk = p->p_task; 778 task_change(tk, p); 779 780 /* 781 * Now move threads one by one to their new project. 782 */ 783 changeproj(p, tk->tk_proj, zone, projbuf, zonebuf); 784 if (flags & TASK_FINAL) 785 p->p_task->tk_flags |= TASK_FINAL; 786 787 mutex_exit(&pidlock); 788 789 fss_freebuf(zonebuf, FSS_ALLOC_ZONE); 790 fss_freebuf(projbuf, FSS_ALLOC_PROJ); 791 return (prev_tk); 792 } 793 794 /* 795 * rctl ops vectors 796 */ 797 static rctl_ops_t task_lwps_ops = { 798 rcop_no_action, 799 task_lwps_usage, 800 task_lwps_set, 801 task_lwps_test 802 }; 803 804 static rctl_ops_t task_cpu_time_ops = { 805 rcop_no_action, 806 task_cpu_time_usage, 807 rcop_no_set, 808 task_cpu_time_test 809 }; 810 811 /*ARGSUSED*/ 812 /* 813 * void task_init(void) 814 * 815 * Overview 816 * task_init() initializes task-related hashes, caches, and the task id 817 * space. Additionally, task_init() establishes p0 as a member of task0. 818 * Called by main(). 819 * 820 * Return values 821 * None. 822 * 823 * Caller's context 824 * task_init() must be called prior to MP startup. 825 */ 826 void 827 task_init(void) 828 { 829 proc_t *p = &p0; 830 mod_hash_hndl_t hndl; 831 rctl_set_t *set; 832 rctl_alloc_gp_t *gp; 833 rctl_entity_p_t e; 834 /* 835 * Initialize task_cache and taskid_space. 836 */ 837 task_cache = kmem_cache_create("task_cache", sizeof (task_t), 838 0, NULL, NULL, NULL, NULL, NULL, 0); 839 taskid_space = id_space_create("taskid_space", 0, MAX_TASKID); 840 841 /* 842 * Initialize task hash table. 843 */ 844 task_hash = mod_hash_create_idhash("task_hash", task_hash_size, 845 mod_hash_null_valdtor); 846 847 /* 848 * Initialize task-based rctls. 849 */ 850 rc_task_lwps = rctl_register("task.max-lwps", RCENTITY_TASK, 851 RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_COUNT, INT_MAX, INT_MAX, 852 &task_lwps_ops); 853 rc_task_cpu_time = rctl_register("task.max-cpu-time", RCENTITY_TASK, 854 RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_DENY_NEVER | 855 RCTL_GLOBAL_CPU_TIME | RCTL_GLOBAL_INFINITE | 856 RCTL_GLOBAL_UNOBSERVABLE | RCTL_GLOBAL_SECONDS, UINT64_MAX, 857 UINT64_MAX, &task_cpu_time_ops); 858 859 /* 860 * Create task0 and place p0 in it as a member. 861 */ 862 task0p = kmem_cache_alloc(task_cache, KM_SLEEP); 863 bzero(task0p, sizeof (task_t)); 864 865 task0p->tk_tkid = id_alloc(taskid_space); 866 task0p->tk_usage = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 867 task0p->tk_inherited = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 868 task0p->tk_proj = project_hold_by_id(0, &zone0, 869 PROJECT_HOLD_INSERT); 870 task0p->tk_flags = TASK_NORMAL; 871 task0p->tk_nlwps = p->p_lwpcnt; 872 task0p->tk_zone = global_zone; 873 874 set = rctl_set_create(); 875 gp = rctl_set_init_prealloc(RCENTITY_TASK); 876 mutex_enter(&curproc->p_lock); 877 e.rcep_p.task = task0p; 878 e.rcep_t = RCENTITY_TASK; 879 task0p->tk_rctls = rctl_set_init(RCENTITY_TASK, curproc, &e, set, gp); 880 mutex_exit(&curproc->p_lock); 881 rctl_prealloc_destroy(gp); 882 883 (void) mod_hash_reserve(task_hash, &hndl); 884 mutex_enter(&task_hash_lock); 885 ASSERT(task_find(task0p->tk_tkid, GLOBAL_ZONEID) == NULL); 886 if (mod_hash_insert_reserve(task_hash, 887 (mod_hash_key_t)(uintptr_t)task0p->tk_tkid, 888 (mod_hash_val_t *)task0p, hndl) != 0) { 889 mod_hash_cancel(task_hash, &hndl); 890 panic("unable to insert task %d(%p)", task0p->tk_tkid, 891 (void *)task0p); 892 } 893 mutex_exit(&task_hash_lock); 894 895 task0p->tk_memb_list = p; 896 897 /* 898 * Initialize task pointers for p0, including doubly linked list of task 899 * members. 900 */ 901 p->p_task = task0p; 902 p->p_taskprev = p->p_tasknext = p; 903 task_hold(task0p); 904 } 905