1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/atomic.h> 27 #include <sys/cmn_err.h> 28 #include <sys/exacct.h> 29 #include <sys/id_space.h> 30 #include <sys/kmem.h> 31 #include <sys/modhash.h> 32 #include <sys/mutex.h> 33 #include <sys/proc.h> 34 #include <sys/project.h> 35 #include <sys/rctl.h> 36 #include <sys/systm.h> 37 #include <sys/task.h> 38 #include <sys/time.h> 39 #include <sys/types.h> 40 #include <sys/zone.h> 41 #include <sys/cpuvar.h> 42 #include <sys/fss.h> 43 #include <sys/class.h> 44 #include <sys/project.h> 45 46 /* 47 * Tasks 48 * 49 * A task is a collection of processes, associated with a common project ID 50 * and related by a common initial parent. The task primarily represents a 51 * natural process sequence with known resource usage, although it can also be 52 * viewed as a convenient grouping of processes for signal delivery, processor 53 * binding, and administrative operations. 54 * 55 * Membership and observership 56 * We can conceive of situations where processes outside of the task may wish 57 * to examine the resource usage of the task. Similarly, a number of the 58 * administrative operations on a task can be performed by processes who are 59 * not members of the task. Accordingly, we must design a locking strategy 60 * where observers of the task, who wish to examine or operate on the task, 61 * and members of task, who can perform the mentioned operations, as well as 62 * leave the task, see a consistent and correct representation of the task at 63 * all times. 64 * 65 * Locking 66 * Because the task membership is a new relation between processes, its 67 * locking becomes an additional responsibility of the pidlock/p_lock locking 68 * sequence; however, tasks closely resemble sessions and the session locking 69 * model is mostly appropriate for the interaction of tasks, processes, and 70 * procfs. 71 * 72 * kmutex_t task_hash_lock 73 * task_hash_lock is a global lock protecting the contents of the task 74 * ID-to-task pointer hash. Holders of task_hash_lock must not attempt to 75 * acquire pidlock or p_lock. 76 * uint_t tk_hold_count 77 * tk_hold_count, the number of members and observers of the current task, 78 * must be manipulated atomically. 79 * proc_t *tk_memb_list 80 * proc_t *p_tasknext 81 * proc_t *p_taskprev 82 * The task's membership list is protected by pidlock, and is therefore 83 * always acquired before any of its members' p_lock mutexes. The p_task 84 * member of the proc structure is protected by pidlock or p_lock for 85 * reading, and by both pidlock and p_lock for modification, as is done for 86 * p_sessp. The key point is that only the process can modify its p_task, 87 * and not any entity on the system. (/proc will use prlock() to prevent 88 * the process from leaving, as opposed to pidlock.) 89 * kmutex_t tk_usage_lock 90 * tk_usage_lock is a per-task lock protecting the contents of the task 91 * usage structure and tk_nlwps counter for the task.max-lwps resource 92 * control. 93 */ 94 95 int task_hash_size = 256; 96 static kmutex_t task_hash_lock; 97 static mod_hash_t *task_hash; 98 99 static id_space_t *taskid_space; /* global taskid space */ 100 static kmem_cache_t *task_cache; /* kmem cache for task structures */ 101 102 rctl_hndl_t rc_task_lwps; 103 rctl_hndl_t rc_task_cpu_time; 104 105 /* 106 * static rctl_qty_t task_usage_lwps(void *taskp) 107 * 108 * Overview 109 * task_usage_lwps() is the usage operation for the resource control 110 * associated with the number of LWPs in a task. 111 * 112 * Return values 113 * The number of LWPs in the given task is returned. 114 * 115 * Caller's context 116 * The p->p_lock must be held across the call. 117 */ 118 /*ARGSUSED*/ 119 static rctl_qty_t 120 task_lwps_usage(rctl_t *r, proc_t *p) 121 { 122 task_t *t; 123 rctl_qty_t nlwps; 124 125 ASSERT(MUTEX_HELD(&p->p_lock)); 126 127 t = p->p_task; 128 mutex_enter(&p->p_zone->zone_nlwps_lock); 129 nlwps = t->tk_nlwps; 130 mutex_exit(&p->p_zone->zone_nlwps_lock); 131 132 return (nlwps); 133 } 134 135 /* 136 * static int task_test_lwps(void *taskp, rctl_val_t *, int64_t incr, 137 * int flags) 138 * 139 * Overview 140 * task_test_lwps() is the test-if-valid-increment for the resource control 141 * for the number of processes in a task. 142 * 143 * Return values 144 * 0 if the threshold limit was not passed, 1 if the limit was passed. 145 * 146 * Caller's context 147 * p->p_lock must be held across the call. 148 */ 149 /*ARGSUSED*/ 150 static int 151 task_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl, 152 rctl_qty_t incr, 153 uint_t flags) 154 { 155 rctl_qty_t nlwps; 156 157 ASSERT(MUTEX_HELD(&p->p_lock)); 158 ASSERT(e->rcep_t == RCENTITY_TASK); 159 if (e->rcep_p.task == NULL) 160 return (0); 161 162 ASSERT(MUTEX_HELD(&(e->rcep_p.task->tk_zone->zone_nlwps_lock))); 163 nlwps = e->rcep_p.task->tk_nlwps; 164 165 if (nlwps + incr > rcntl->rcv_value) 166 return (1); 167 168 return (0); 169 } 170 /*ARGSUSED*/ 171 static int 172 task_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) { 173 174 ASSERT(MUTEX_HELD(&p->p_lock)); 175 ASSERT(e->rcep_t == RCENTITY_TASK); 176 if (e->rcep_p.task == NULL) 177 return (0); 178 179 e->rcep_p.task->tk_nlwps_ctl = nv; 180 return (0); 181 } 182 183 /* 184 * static rctl_qty_t task_usage_cpu_secs(void *taskp) 185 * 186 * Overview 187 * task_usage_cpu_secs() is the usage operation for the resource control 188 * associated with the total accrued CPU seconds for a task. 189 * 190 * Return values 191 * The number of CPU seconds consumed by the task is returned. 192 * 193 * Caller's context 194 * The given task must be held across the call. 195 */ 196 /*ARGSUSED*/ 197 static rctl_qty_t 198 task_cpu_time_usage(rctl_t *r, proc_t *p) 199 { 200 task_t *t = p->p_task; 201 202 ASSERT(MUTEX_HELD(&p->p_lock)); 203 return (t->tk_cpu_time); 204 } 205 206 /* 207 * int task_cpu_time_incr(task_t *t, rctl_qty_t incr) 208 * 209 * Overview 210 * task_cpu_time_incr() increments the amount of CPU time used 211 * by this task. 212 * 213 * Return values 214 * 1 if a second or more time is accumulated 215 * 0 otherwise 216 * 217 * Caller's context 218 * This is called by the clock tick accounting function to charge 219 * CPU time to a task. 220 */ 221 rctl_qty_t 222 task_cpu_time_incr(task_t *t, rctl_qty_t incr) 223 { 224 rctl_qty_t ret = 0; 225 226 mutex_enter(&t->tk_cpu_time_lock); 227 t->tk_cpu_ticks += incr; 228 if (t->tk_cpu_ticks >= hz) { 229 t->tk_cpu_time += t->tk_cpu_ticks / hz; 230 t->tk_cpu_ticks = t->tk_cpu_ticks % hz; 231 ret = t->tk_cpu_time; 232 } 233 mutex_exit(&t->tk_cpu_time_lock); 234 235 return (ret); 236 } 237 238 /* 239 * static int task_test_cpu_secs(void *taskp, rctl_val_t *, int64_t incr, 240 * int flags) 241 * 242 * Overview 243 * task_test_cpu_secs() is the test-if-valid-increment for the resource 244 * control for the total accrued CPU seconds for a task. 245 * 246 * Return values 247 * 0 if the threshold limit was not passed, 1 if the limit was passed. 248 * 249 * Caller's context 250 * The given task must be held across the call. 251 */ 252 /*ARGSUSED*/ 253 static int 254 task_cpu_time_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, 255 struct rctl_val *rcntl, rctl_qty_t incr, uint_t flags) 256 { 257 ASSERT(MUTEX_HELD(&p->p_lock)); 258 ASSERT(e->rcep_t == RCENTITY_TASK); 259 if (e->rcep_p.task == NULL) 260 return (0); 261 262 if (incr >= rcntl->rcv_value) 263 return (1); 264 265 return (0); 266 } 267 268 static task_t * 269 task_find(taskid_t id, zoneid_t zoneid) 270 { 271 task_t *tk; 272 273 ASSERT(MUTEX_HELD(&task_hash_lock)); 274 275 if (mod_hash_find(task_hash, (mod_hash_key_t)(uintptr_t)id, 276 (mod_hash_val_t *)&tk) == MH_ERR_NOTFOUND || 277 (zoneid != ALL_ZONES && zoneid != tk->tk_zone->zone_id)) 278 return (NULL); 279 280 return (tk); 281 } 282 283 /* 284 * task_hold_by_id(), task_hold_by_id_zone() 285 * 286 * Overview 287 * task_hold_by_id() is used to take a reference on a task by its task id, 288 * supporting the various system call interfaces for obtaining resource data, 289 * delivering signals, and so forth. 290 * 291 * Return values 292 * Returns a pointer to the task_t with taskid_t id. The task is returned 293 * with its hold count incremented by one. Returns NULL if there 294 * is no task with the requested id. 295 * 296 * Caller's context 297 * Caller must not be holding task_hash_lock. No restrictions on context. 298 */ 299 task_t * 300 task_hold_by_id_zone(taskid_t id, zoneid_t zoneid) 301 { 302 task_t *tk; 303 304 mutex_enter(&task_hash_lock); 305 if ((tk = task_find(id, zoneid)) != NULL) 306 atomic_add_32(&tk->tk_hold_count, 1); 307 mutex_exit(&task_hash_lock); 308 309 return (tk); 310 } 311 312 task_t * 313 task_hold_by_id(taskid_t id) 314 { 315 zoneid_t zoneid; 316 317 if (INGLOBALZONE(curproc)) 318 zoneid = ALL_ZONES; 319 else 320 zoneid = getzoneid(); 321 return (task_hold_by_id_zone(id, zoneid)); 322 } 323 324 /* 325 * void task_hold(task_t *) 326 * 327 * Overview 328 * task_hold() is used to take an additional reference to the given task. 329 * 330 * Return values 331 * None. 332 * 333 * Caller's context 334 * No restriction on context. 335 */ 336 void 337 task_hold(task_t *tk) 338 { 339 atomic_add_32(&tk->tk_hold_count, 1); 340 } 341 342 /* 343 * void task_rele(task_t *) 344 * 345 * Overview 346 * task_rele() relinquishes a reference on the given task, which was acquired 347 * via task_hold() or task_hold_by_id(). If this is the last member or 348 * observer of the task, dispatch it for commitment via the accounting 349 * subsystem. 350 * 351 * Return values 352 * None. 353 * 354 * Caller's context 355 * Caller must not be holding the task_hash_lock. 356 * Caller's context must be acceptable for KM_SLEEP allocations. 357 */ 358 void 359 task_rele(task_t *tk) 360 { 361 mutex_enter(&task_hash_lock); 362 if (atomic_add_32_nv(&tk->tk_hold_count, -1) > 0) { 363 mutex_exit(&task_hash_lock); 364 return; 365 } 366 367 mutex_enter(&tk->tk_zone->zone_nlwps_lock); 368 tk->tk_proj->kpj_ntasks--; 369 mutex_exit(&tk->tk_zone->zone_nlwps_lock); 370 371 if (mod_hash_destroy(task_hash, 372 (mod_hash_key_t)(uintptr_t)tk->tk_tkid) != 0) 373 panic("unable to delete task %d", tk->tk_tkid); 374 mutex_exit(&task_hash_lock); 375 376 /* 377 * At this point, there are no members or observers of the task, so we 378 * can safely send it on for commitment to the accounting subsystem. 379 * The task will be destroyed in task_end() subsequent to commitment. 380 */ 381 (void) taskq_dispatch(exacct_queue, exacct_commit_task, tk, KM_SLEEP); 382 } 383 384 /* 385 * task_t *task_create(projid_t, zone *) 386 * 387 * Overview 388 * A process constructing a new task calls task_create() to construct and 389 * preinitialize the task for the appropriate destination project. Only one 390 * task, the primordial task0, is not created with task_create(). 391 * 392 * Return values 393 * None. 394 * 395 * Caller's context 396 * Caller's context should be safe for KM_SLEEP allocations. 397 * The caller should appropriately bump the kpj_ntasks counter on the 398 * project that contains this task. 399 */ 400 task_t * 401 task_create(projid_t projid, zone_t *zone) 402 { 403 task_t *tk = kmem_cache_alloc(task_cache, KM_SLEEP); 404 task_t *ancestor_tk; 405 taskid_t tkid; 406 task_usage_t *tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 407 mod_hash_hndl_t hndl; 408 rctl_set_t *set = rctl_set_create(); 409 rctl_alloc_gp_t *gp; 410 rctl_entity_p_t e; 411 412 bzero(tk, sizeof (task_t)); 413 414 tk->tk_tkid = tkid = id_alloc(taskid_space); 415 tk->tk_nlwps = 0; 416 tk->tk_nlwps_ctl = INT_MAX; 417 tk->tk_usage = tu; 418 tk->tk_inherited = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 419 tk->tk_proj = project_hold_by_id(projid, zone, PROJECT_HOLD_INSERT); 420 tk->tk_flags = TASK_NORMAL; 421 422 /* 423 * Copy ancestor task's resource controls. 424 */ 425 zone_task_hold(zone); 426 mutex_enter(&curproc->p_lock); 427 ancestor_tk = curproc->p_task; 428 task_hold(ancestor_tk); 429 tk->tk_zone = zone; 430 mutex_exit(&curproc->p_lock); 431 432 for (;;) { 433 gp = rctl_set_dup_prealloc(ancestor_tk->tk_rctls); 434 435 mutex_enter(&ancestor_tk->tk_rctls->rcs_lock); 436 if (rctl_set_dup_ready(ancestor_tk->tk_rctls, gp)) 437 break; 438 439 mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 440 441 rctl_prealloc_destroy(gp); 442 } 443 444 /* 445 * At this point, curproc does not have the appropriate linkage 446 * through the task to the project. So, rctl_set_dup should only 447 * copy the rctls, and leave the callbacks for later. 448 */ 449 e.rcep_p.task = tk; 450 e.rcep_t = RCENTITY_TASK; 451 tk->tk_rctls = rctl_set_dup(ancestor_tk->tk_rctls, curproc, curproc, &e, 452 set, gp, RCD_DUP); 453 mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 454 455 rctl_prealloc_destroy(gp); 456 457 /* 458 * Record the ancestor task's ID for use by extended accounting. 459 */ 460 tu->tu_anctaskid = ancestor_tk->tk_tkid; 461 task_rele(ancestor_tk); 462 463 /* 464 * Put new task structure in the hash table. 465 */ 466 (void) mod_hash_reserve(task_hash, &hndl); 467 mutex_enter(&task_hash_lock); 468 ASSERT(task_find(tkid, zone->zone_id) == NULL); 469 if (mod_hash_insert_reserve(task_hash, (mod_hash_key_t)(uintptr_t)tkid, 470 (mod_hash_val_t *)tk, hndl) != 0) { 471 mod_hash_cancel(task_hash, &hndl); 472 panic("unable to insert task %d(%p)", tkid, (void *)tk); 473 } 474 mutex_exit(&task_hash_lock); 475 476 return (tk); 477 } 478 479 /* 480 * void task_attach(task_t *, proc_t *) 481 * 482 * Overview 483 * task_attach() is used to attach a process to a task; this operation is only 484 * performed as a result of a fork() or settaskid() system call. The proc_t's 485 * p_tasknext and p_taskprev fields will be set such that the proc_t is a 486 * member of the doubly-linked list of proc_t's that make up the task. 487 * 488 * Return values 489 * None. 490 * 491 * Caller's context 492 * pidlock and p->p_lock must be held on entry. 493 */ 494 void 495 task_attach(task_t *tk, proc_t *p) 496 { 497 proc_t *first, *prev; 498 rctl_entity_p_t e; 499 ASSERT(tk != NULL); 500 ASSERT(p != NULL); 501 ASSERT(MUTEX_HELD(&pidlock)); 502 ASSERT(MUTEX_HELD(&p->p_lock)); 503 504 if (tk->tk_memb_list == NULL) { 505 p->p_tasknext = p; 506 p->p_taskprev = p; 507 } else { 508 first = tk->tk_memb_list; 509 prev = first->p_taskprev; 510 first->p_taskprev = p; 511 p->p_tasknext = first; 512 p->p_taskprev = prev; 513 prev->p_tasknext = p; 514 } 515 tk->tk_memb_list = p; 516 task_hold(tk); 517 p->p_task = tk; 518 519 /* 520 * Now that the linkage from process to task and project is 521 * complete, do the required callbacks for the task and project 522 * rctl sets. 523 */ 524 e.rcep_p.proj = tk->tk_proj; 525 e.rcep_t = RCENTITY_PROJECT; 526 (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_proj->kpj_rctls, NULL, 527 RCD_CALLBACK); 528 529 e.rcep_p.task = tk; 530 e.rcep_t = RCENTITY_TASK; 531 (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_rctls, NULL, 532 RCD_CALLBACK); 533 534 } 535 536 /* 537 * task_begin() 538 * 539 * Overview 540 * A process constructing a new task calls task_begin() to initialize the 541 * task, by attaching itself as a member. 542 * 543 * Return values 544 * None. 545 * 546 * Caller's context 547 * pidlock and p_lock must be held across the call to task_begin(). 548 */ 549 void 550 task_begin(task_t *tk, proc_t *p) 551 { 552 timestruc_t ts; 553 task_usage_t *tu; 554 555 ASSERT(MUTEX_HELD(&pidlock)); 556 ASSERT(MUTEX_HELD(&p->p_lock)); 557 558 mutex_enter(&tk->tk_usage_lock); 559 tu = tk->tk_usage; 560 gethrestime(&ts); 561 tu->tu_startsec = (uint64_t)ts.tv_sec; 562 tu->tu_startnsec = (uint64_t)ts.tv_nsec; 563 mutex_exit(&tk->tk_usage_lock); 564 565 /* 566 * Join process to the task as a member. 567 */ 568 task_attach(tk, p); 569 } 570 571 /* 572 * void task_detach(proc_t *) 573 * 574 * Overview 575 * task_detach() removes the specified process from its task. task_detach 576 * sets the process's task membership to NULL, in anticipation of a final exit 577 * or of joining a new task. Because task_rele() requires a context safe for 578 * KM_SLEEP allocations, a task_detach() is followed by a subsequent 579 * task_rele() once appropriate context is available. 580 * 581 * Because task_detach() involves relinquishing the process's membership in 582 * the project, any observational rctls the process may have had on the task 583 * or project are destroyed. 584 * 585 * Return values 586 * None. 587 * 588 * Caller's context 589 * pidlock and p_lock held across task_detach(). 590 */ 591 void 592 task_detach(proc_t *p) 593 { 594 task_t *tk = p->p_task; 595 596 ASSERT(MUTEX_HELD(&pidlock)); 597 ASSERT(MUTEX_HELD(&p->p_lock)); 598 ASSERT(p->p_task != NULL); 599 ASSERT(tk->tk_memb_list != NULL); 600 601 if (tk->tk_memb_list == p) 602 tk->tk_memb_list = p->p_tasknext; 603 if (tk->tk_memb_list == p) 604 tk->tk_memb_list = NULL; 605 p->p_taskprev->p_tasknext = p->p_tasknext; 606 p->p_tasknext->p_taskprev = p->p_taskprev; 607 608 rctl_set_tearoff(p->p_task->tk_rctls, p); 609 rctl_set_tearoff(p->p_task->tk_proj->kpj_rctls, p); 610 611 p->p_task = NULL; 612 p->p_tasknext = p->p_taskprev = NULL; 613 } 614 615 /* 616 * task_change(task_t *, proc_t *) 617 * 618 * Overview 619 * task_change() removes the specified process from its current task. The 620 * process is then attached to the specified task. This routine is called 621 * from settaskid() when process is being moved to a new task. 622 * 623 * Return values 624 * None. 625 * 626 * Caller's context 627 * pidlock and p_lock held across task_change() 628 */ 629 void 630 task_change(task_t *newtk, proc_t *p) 631 { 632 task_t *oldtk = p->p_task; 633 634 ASSERT(MUTEX_HELD(&pidlock)); 635 ASSERT(MUTEX_HELD(&p->p_lock)); 636 ASSERT(oldtk != NULL); 637 ASSERT(oldtk->tk_memb_list != NULL); 638 639 mutex_enter(&oldtk->tk_zone->zone_nlwps_lock); 640 oldtk->tk_nlwps -= p->p_lwpcnt; 641 mutex_exit(&oldtk->tk_zone->zone_nlwps_lock); 642 643 mutex_enter(&newtk->tk_zone->zone_nlwps_lock); 644 newtk->tk_nlwps += p->p_lwpcnt; 645 mutex_exit(&newtk->tk_zone->zone_nlwps_lock); 646 647 task_detach(p); 648 task_begin(newtk, p); 649 exacct_move_mstate(p, oldtk, newtk); 650 } 651 652 /* 653 * task_end() 654 * 655 * Overview 656 * task_end() contains the actions executed once the final member of 657 * a task has released the task, and all actions connected with the task, such 658 * as committing an accounting record to a file, are completed. It is called 659 * by the known last consumer of the task information. Additionally, 660 * task_end() must never refer to any process in the system. 661 * 662 * Return values 663 * None. 664 * 665 * Caller's context 666 * No restrictions on context, beyond that given above. 667 */ 668 void 669 task_end(task_t *tk) 670 { 671 ASSERT(tk->tk_hold_count == 0); 672 673 project_rele(tk->tk_proj); 674 kmem_free(tk->tk_usage, sizeof (task_usage_t)); 675 kmem_free(tk->tk_inherited, sizeof (task_usage_t)); 676 if (tk->tk_prevusage != NULL) 677 kmem_free(tk->tk_prevusage, sizeof (task_usage_t)); 678 if (tk->tk_zoneusage != NULL) 679 kmem_free(tk->tk_zoneusage, sizeof (task_usage_t)); 680 rctl_set_free(tk->tk_rctls); 681 id_free(taskid_space, tk->tk_tkid); 682 zone_task_rele(tk->tk_zone); 683 kmem_cache_free(task_cache, tk); 684 } 685 686 static void 687 changeproj(proc_t *p, kproject_t *kpj, zone_t *zone, void *projbuf, 688 void *zonebuf) 689 { 690 kproject_t *oldkpj; 691 kthread_t *t; 692 693 ASSERT(MUTEX_HELD(&pidlock)); 694 ASSERT(MUTEX_HELD(&p->p_lock)); 695 696 if ((t = p->p_tlist) != NULL) { 697 do { 698 (void) project_hold(kpj); 699 700 thread_lock(t); 701 oldkpj = ttoproj(t); 702 703 /* 704 * Kick this thread so that he doesn't sit 705 * on a wrong wait queue. 706 */ 707 if (ISWAITING(t)) 708 setrun_locked(t); 709 710 /* 711 * The thread wants to go on the project wait queue, but 712 * the waitq is changing. 713 */ 714 if (t->t_schedflag & TS_PROJWAITQ) 715 t->t_schedflag &= ~ TS_PROJWAITQ; 716 717 t->t_proj = kpj; 718 t->t_pre_sys = 1; /* For cred update */ 719 thread_unlock(t); 720 fss_changeproj(t, kpj, zone, projbuf, zonebuf); 721 722 project_rele(oldkpj); 723 } while ((t = t->t_forw) != p->p_tlist); 724 } 725 } 726 727 /* 728 * task_join() 729 * 730 * Overview 731 * task_join() contains the actions that must be executed when the first 732 * member (curproc) of a newly created task joins it. It may never fail. 733 * 734 * The caller must make sure holdlwps() is called so that all other lwps are 735 * stopped prior to calling this function. 736 * 737 * NB: It returns with curproc->p_lock held. 738 * 739 * Return values 740 * Pointer to the old task. 741 * 742 * Caller's context 743 * cpu_lock must be held entering the function. It will acquire pidlock, 744 * p_crlock and p_lock during execution. 745 */ 746 task_t * 747 task_join(task_t *tk, uint_t flags) 748 { 749 proc_t *p = ttoproc(curthread); 750 task_t *prev_tk; 751 void *projbuf, *zonebuf; 752 zone_t *zone = tk->tk_zone; 753 projid_t projid = tk->tk_proj->kpj_id; 754 cred_t *oldcr; 755 756 /* 757 * We can't know for sure if holdlwps() was called, but we can check to 758 * ensure we're single-threaded. 759 */ 760 ASSERT(curthread == p->p_agenttp || p->p_lwprcnt == 1); 761 762 /* 763 * Changing the credential is always hard because we cannot 764 * allocate memory when holding locks but we don't know whether 765 * we need to change it. We first get a reference to the current 766 * cred if we need to change it. Then we create a credential 767 * with an updated project id. Finally we install it, first 768 * releasing the reference we had on the p_cred at the time we 769 * acquired the lock the first time and later we release the 770 * reference to p_cred at the time we acquired the lock the 771 * second time. 772 */ 773 mutex_enter(&p->p_crlock); 774 if (crgetprojid(p->p_cred) == projid) 775 oldcr = NULL; 776 else 777 crhold(oldcr = p->p_cred); 778 mutex_exit(&p->p_crlock); 779 780 if (oldcr != NULL) { 781 cred_t *newcr = crdup(oldcr); 782 crsetprojid(newcr, projid); 783 crfree(oldcr); 784 785 mutex_enter(&p->p_crlock); 786 oldcr = p->p_cred; 787 p->p_cred = newcr; 788 mutex_exit(&p->p_crlock); 789 crfree(oldcr); 790 } 791 792 /* 793 * Make sure that the number of processor sets is constant 794 * across this operation. 795 */ 796 ASSERT(MUTEX_HELD(&cpu_lock)); 797 798 projbuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_PROJ); 799 zonebuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_ZONE); 800 801 mutex_enter(&pidlock); 802 mutex_enter(&p->p_lock); 803 804 prev_tk = p->p_task; 805 task_change(tk, p); 806 807 /* 808 * Now move threads one by one to their new project. 809 */ 810 changeproj(p, tk->tk_proj, zone, projbuf, zonebuf); 811 if (flags & TASK_FINAL) 812 p->p_task->tk_flags |= TASK_FINAL; 813 814 mutex_exit(&pidlock); 815 816 fss_freebuf(zonebuf, FSS_ALLOC_ZONE); 817 fss_freebuf(projbuf, FSS_ALLOC_PROJ); 818 return (prev_tk); 819 } 820 821 /* 822 * rctl ops vectors 823 */ 824 static rctl_ops_t task_lwps_ops = { 825 rcop_no_action, 826 task_lwps_usage, 827 task_lwps_set, 828 task_lwps_test 829 }; 830 831 static rctl_ops_t task_cpu_time_ops = { 832 rcop_no_action, 833 task_cpu_time_usage, 834 rcop_no_set, 835 task_cpu_time_test 836 }; 837 838 /*ARGSUSED*/ 839 /* 840 * void task_init(void) 841 * 842 * Overview 843 * task_init() initializes task-related hashes, caches, and the task id 844 * space. Additionally, task_init() establishes p0 as a member of task0. 845 * Called by main(). 846 * 847 * Return values 848 * None. 849 * 850 * Caller's context 851 * task_init() must be called prior to MP startup. 852 */ 853 void 854 task_init(void) 855 { 856 proc_t *p = &p0; 857 mod_hash_hndl_t hndl; 858 rctl_set_t *set; 859 rctl_alloc_gp_t *gp; 860 rctl_entity_p_t e; 861 /* 862 * Initialize task_cache and taskid_space. 863 */ 864 task_cache = kmem_cache_create("task_cache", sizeof (task_t), 865 0, NULL, NULL, NULL, NULL, NULL, 0); 866 taskid_space = id_space_create("taskid_space", 0, MAX_TASKID); 867 868 /* 869 * Initialize task hash table. 870 */ 871 task_hash = mod_hash_create_idhash("task_hash", task_hash_size, 872 mod_hash_null_valdtor); 873 874 /* 875 * Initialize task-based rctls. 876 */ 877 rc_task_lwps = rctl_register("task.max-lwps", RCENTITY_TASK, 878 RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_COUNT, INT_MAX, INT_MAX, 879 &task_lwps_ops); 880 rc_task_cpu_time = rctl_register("task.max-cpu-time", RCENTITY_TASK, 881 RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_DENY_NEVER | 882 RCTL_GLOBAL_CPU_TIME | RCTL_GLOBAL_INFINITE | 883 RCTL_GLOBAL_UNOBSERVABLE | RCTL_GLOBAL_SECONDS, UINT64_MAX, 884 UINT64_MAX, &task_cpu_time_ops); 885 886 /* 887 * Create task0 and place p0 in it as a member. 888 */ 889 task0p = kmem_cache_alloc(task_cache, KM_SLEEP); 890 bzero(task0p, sizeof (task_t)); 891 892 task0p->tk_tkid = id_alloc(taskid_space); 893 task0p->tk_usage = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 894 task0p->tk_inherited = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 895 task0p->tk_proj = project_hold_by_id(0, &zone0, 896 PROJECT_HOLD_INSERT); 897 task0p->tk_flags = TASK_NORMAL; 898 task0p->tk_nlwps = p->p_lwpcnt; 899 task0p->tk_zone = global_zone; 900 901 set = rctl_set_create(); 902 gp = rctl_set_init_prealloc(RCENTITY_TASK); 903 mutex_enter(&curproc->p_lock); 904 e.rcep_p.task = task0p; 905 e.rcep_t = RCENTITY_TASK; 906 task0p->tk_rctls = rctl_set_init(RCENTITY_TASK, curproc, &e, set, gp); 907 mutex_exit(&curproc->p_lock); 908 rctl_prealloc_destroy(gp); 909 910 (void) mod_hash_reserve(task_hash, &hndl); 911 mutex_enter(&task_hash_lock); 912 ASSERT(task_find(task0p->tk_tkid, GLOBAL_ZONEID) == NULL); 913 if (mod_hash_insert_reserve(task_hash, 914 (mod_hash_key_t)(uintptr_t)task0p->tk_tkid, 915 (mod_hash_val_t *)task0p, hndl) != 0) { 916 mod_hash_cancel(task_hash, &hndl); 917 panic("unable to insert task %d(%p)", task0p->tk_tkid, 918 (void *)task0p); 919 } 920 mutex_exit(&task_hash_lock); 921 922 task0p->tk_memb_list = p; 923 924 /* 925 * Initialize task pointers for p0, including doubly linked list of task 926 * members. 927 */ 928 p->p_task = task0p; 929 p->p_taskprev = p->p_tasknext = p; 930 task_hold(task0p); 931 } 932