1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/atomic.h> 29 #include <sys/cmn_err.h> 30 #include <sys/exacct.h> 31 #include <sys/id_space.h> 32 #include <sys/kmem.h> 33 #include <sys/modhash.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/project.h> 37 #include <sys/rctl.h> 38 #include <sys/systm.h> 39 #include <sys/task.h> 40 #include <sys/time.h> 41 #include <sys/types.h> 42 #include <sys/zone.h> 43 #include <sys/cpuvar.h> 44 #include <sys/fss.h> 45 #include <sys/class.h> 46 #include <sys/project.h> 47 48 /* 49 * Tasks 50 * 51 * A task is a collection of processes, associated with a common project ID 52 * and related by a common initial parent. The task primarily represents a 53 * natural process sequence with known resource usage, although it can also be 54 * viewed as a convenient grouping of processes for signal delivery, processor 55 * binding, and administrative operations. 56 * 57 * Membership and observership 58 * We can conceive of situations where processes outside of the task may wish 59 * to examine the resource usage of the task. Similarly, a number of the 60 * administrative operations on a task can be performed by processes who are 61 * not members of the task. Accordingly, we must design a locking strategy 62 * where observers of the task, who wish to examine or operate on the task, 63 * and members of task, who can perform the mentioned operations, as well as 64 * leave the task, see a consistent and correct representation of the task at 65 * all times. 66 * 67 * Locking 68 * Because the task membership is a new relation between processes, its 69 * locking becomes an additional responsibility of the pidlock/p_lock locking 70 * sequence; however, tasks closely resemble sessions and the session locking 71 * model is mostly appropriate for the interaction of tasks, processes, and 72 * procfs. 73 * 74 * kmutex_t task_hash_lock 75 * task_hash_lock is a global lock protecting the contents of the task 76 * ID-to-task pointer hash. Holders of task_hash_lock must not attempt to 77 * acquire pidlock or p_lock. 78 * uint_t tk_hold_count 79 * tk_hold_count, the number of members and observers of the current task, 80 * must be manipulated atomically. 81 * proc_t *tk_memb_list 82 * proc_t *p_tasknext 83 * proc_t *p_taskprev 84 * The task's membership list is protected by pidlock, and is therefore 85 * always acquired before any of its members' p_lock mutexes. The p_task 86 * member of the proc structure is protected by pidlock or p_lock for 87 * reading, and by both pidlock and p_lock for modification, as is done for 88 * p_sessp. The key point is that only the process can modify its p_task, 89 * and not any entity on the system. (/proc will use prlock() to prevent 90 * the process from leaving, as opposed to pidlock.) 91 * kmutex_t tk_usage_lock 92 * tk_usage_lock is a per-task lock protecting the contents of the task 93 * usage structure and tk_nlwps counter for the task.max-lwps resource 94 * control. 95 */ 96 97 int task_hash_size = 256; 98 static kmutex_t task_hash_lock; 99 static mod_hash_t *task_hash; 100 101 static id_space_t *taskid_space; /* global taskid space */ 102 static kmem_cache_t *task_cache; /* kmem cache for task structures */ 103 104 rctl_hndl_t rc_task_lwps; 105 rctl_hndl_t rc_task_cpu_time; 106 107 /* 108 * static rctl_qty_t task_usage_lwps(void *taskp) 109 * 110 * Overview 111 * task_usage_lwps() is the usage operation for the resource control 112 * associated with the number of LWPs in a task. 113 * 114 * Return values 115 * The number of LWPs in the given task is returned. 116 * 117 * Caller's context 118 * The p->p_lock must be held across the call. 119 */ 120 /*ARGSUSED*/ 121 static rctl_qty_t 122 task_lwps_usage(rctl_t *r, proc_t *p) 123 { 124 task_t *t; 125 rctl_qty_t nlwps; 126 127 ASSERT(MUTEX_HELD(&p->p_lock)); 128 129 t = p->p_task; 130 mutex_enter(&p->p_zone->zone_nlwps_lock); 131 nlwps = t->tk_nlwps; 132 mutex_exit(&p->p_zone->zone_nlwps_lock); 133 134 return (nlwps); 135 } 136 137 /* 138 * static int task_test_lwps(void *taskp, rctl_val_t *, int64_t incr, 139 * int flags) 140 * 141 * Overview 142 * task_test_lwps() is the test-if-valid-increment for the resource control 143 * for the number of processes in a task. 144 * 145 * Return values 146 * 0 if the threshold limit was not passed, 1 if the limit was passed. 147 * 148 * Caller's context 149 * p->p_lock must be held across the call. 150 */ 151 /*ARGSUSED*/ 152 static int 153 task_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl, 154 rctl_qty_t incr, 155 uint_t flags) 156 { 157 rctl_qty_t nlwps; 158 159 ASSERT(MUTEX_HELD(&p->p_lock)); 160 ASSERT(e->rcep_t == RCENTITY_TASK); 161 if (e->rcep_p.task == NULL) 162 return (0); 163 164 ASSERT(MUTEX_HELD(&(e->rcep_p.task->tk_zone->zone_nlwps_lock))); 165 nlwps = e->rcep_p.task->tk_nlwps; 166 167 if (nlwps + incr > rcntl->rcv_value) 168 return (1); 169 170 return (0); 171 } 172 /*ARGSUSED*/ 173 static int 174 task_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) { 175 176 ASSERT(MUTEX_HELD(&p->p_lock)); 177 ASSERT(e->rcep_t == RCENTITY_TASK); 178 if (e->rcep_p.task == NULL) 179 return (0); 180 181 e->rcep_p.task->tk_nlwps_ctl = nv; 182 return (0); 183 } 184 185 /* 186 * static rctl_qty_t task_usage_cpu_secs(void *taskp) 187 * 188 * Overview 189 * task_usage_cpu_secs() is the usage operation for the resource control 190 * associated with the total accrued CPU seconds for a task. 191 * 192 * Return values 193 * The number of CPU seconds consumed by the task is returned. 194 * 195 * Caller's context 196 * The given task must be held across the call. 197 */ 198 /*ARGSUSED*/ 199 static rctl_qty_t 200 task_cpu_time_usage(rctl_t *r, proc_t *p) 201 { 202 task_t *t = p->p_task; 203 204 ASSERT(MUTEX_HELD(&p->p_lock)); 205 return (t->tk_cpu_time / hz); 206 } 207 208 /* 209 * static int task_test_cpu_secs(void *taskp, rctl_val_t *, int64_t incr, 210 * int flags) 211 * 212 * Overview 213 * task_test_cpu_secs() is the test-if-valid-increment for the resource 214 * control for the total accrued CPU seconds for a task. 215 * 216 * Return values 217 * 0 if the threshold limit was not passed, 1 if the limit was passed. 218 * 219 * Caller's context 220 * The given task must be held across the call. 221 */ 222 /*ARGSUSED*/ 223 static int 224 task_cpu_time_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, 225 struct rctl_val *rcntl, rctl_qty_t incr, uint_t flags) 226 { 227 task_t *t; 228 229 ASSERT(MUTEX_HELD(&p->p_lock)); 230 ASSERT(e->rcep_t == RCENTITY_TASK); 231 if (e->rcep_p.task == NULL) 232 return (0); 233 234 t = e->rcep_p.task; 235 if ((t->tk_cpu_time + incr) / hz >= rcntl->rcv_value) 236 return (1); 237 238 return (0); 239 } 240 241 static task_t * 242 task_find(taskid_t id, zoneid_t zoneid) 243 { 244 task_t *tk; 245 246 ASSERT(MUTEX_HELD(&task_hash_lock)); 247 248 if (mod_hash_find(task_hash, (mod_hash_key_t)(uintptr_t)id, 249 (mod_hash_val_t *)&tk) == MH_ERR_NOTFOUND || 250 (zoneid != ALL_ZONES && zoneid != tk->tk_zone->zone_id)) 251 return (NULL); 252 253 return (tk); 254 } 255 256 /* 257 * task_hold_by_id(), task_hold_by_id_zone() 258 * 259 * Overview 260 * task_hold_by_id() is used to take a reference on a task by its task id, 261 * supporting the various system call interfaces for obtaining resource data, 262 * delivering signals, and so forth. 263 * 264 * Return values 265 * Returns a pointer to the task_t with taskid_t id. The task is returned 266 * with its hold count incremented by one. Returns NULL if there 267 * is no task with the requested id. 268 * 269 * Caller's context 270 * Caller must not be holding task_hash_lock. No restrictions on context. 271 */ 272 task_t * 273 task_hold_by_id_zone(taskid_t id, zoneid_t zoneid) 274 { 275 task_t *tk; 276 277 mutex_enter(&task_hash_lock); 278 if ((tk = task_find(id, zoneid)) != NULL) 279 atomic_add_32(&tk->tk_hold_count, 1); 280 mutex_exit(&task_hash_lock); 281 282 return (tk); 283 } 284 285 task_t * 286 task_hold_by_id(taskid_t id) 287 { 288 zoneid_t zoneid; 289 290 if (INGLOBALZONE(curproc)) 291 zoneid = ALL_ZONES; 292 else 293 zoneid = getzoneid(); 294 return (task_hold_by_id_zone(id, zoneid)); 295 } 296 297 /* 298 * void task_hold(task_t *) 299 * 300 * Overview 301 * task_hold() is used to take an additional reference to the given task. 302 * 303 * Return values 304 * None. 305 * 306 * Caller's context 307 * No restriction on context. 308 */ 309 void 310 task_hold(task_t *tk) 311 { 312 atomic_add_32(&tk->tk_hold_count, 1); 313 } 314 315 /* 316 * void task_rele(task_t *) 317 * 318 * Overview 319 * task_rele() relinquishes a reference on the given task, which was acquired 320 * via task_hold() or task_hold_by_id(). If this is the last member or 321 * observer of the task, dispatch it for commitment via the accounting 322 * subsystem. 323 * 324 * Return values 325 * None. 326 * 327 * Caller's context 328 * Caller must not be holding the task_hash_lock. 329 * Caller's context must be acceptable for KM_SLEEP allocations. 330 */ 331 void 332 task_rele(task_t *tk) 333 { 334 mutex_enter(&task_hash_lock); 335 if (atomic_add_32_nv(&tk->tk_hold_count, -1) > 0) { 336 mutex_exit(&task_hash_lock); 337 return; 338 } 339 340 mutex_enter(&tk->tk_zone->zone_nlwps_lock); 341 tk->tk_proj->kpj_ntasks--; 342 mutex_exit(&tk->tk_zone->zone_nlwps_lock); 343 344 if (mod_hash_destroy(task_hash, 345 (mod_hash_key_t)(uintptr_t)tk->tk_tkid) != 0) 346 panic("unable to delete task %d", tk->tk_tkid); 347 mutex_exit(&task_hash_lock); 348 349 /* 350 * At this point, there are no members or observers of the task, so we 351 * can safely send it on for commitment to the accounting subsystem. 352 * The task will be destroyed in task_end() subsequent to commitment. 353 */ 354 (void) taskq_dispatch(exacct_queue, exacct_commit_task, tk, KM_SLEEP); 355 } 356 357 /* 358 * task_t *task_create(projid_t, zone *) 359 * 360 * Overview 361 * A process constructing a new task calls task_create() to construct and 362 * preinitialize the task for the appropriate destination project. Only one 363 * task, the primordial task0, is not created with task_create(). 364 * 365 * Return values 366 * None. 367 * 368 * Caller's context 369 * Caller's context should be safe for KM_SLEEP allocations. 370 * The caller should appropriately bump the kpj_ntasks counter on the 371 * project that contains this task. 372 */ 373 task_t * 374 task_create(projid_t projid, zone_t *zone) 375 { 376 task_t *tk = kmem_cache_alloc(task_cache, KM_SLEEP); 377 task_t *ancestor_tk; 378 taskid_t tkid; 379 task_usage_t *tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 380 mod_hash_hndl_t hndl; 381 rctl_set_t *set = rctl_set_create(); 382 rctl_alloc_gp_t *gp; 383 rctl_entity_p_t e; 384 385 bzero(tk, sizeof (task_t)); 386 387 tk->tk_tkid = tkid = id_alloc(taskid_space); 388 tk->tk_nlwps = 0; 389 tk->tk_nlwps_ctl = INT_MAX; 390 tk->tk_usage = tu; 391 tk->tk_proj = project_hold_by_id(projid, zone, PROJECT_HOLD_INSERT); 392 tk->tk_flags = TASK_NORMAL; 393 394 /* 395 * Copy ancestor task's resource controls. 396 */ 397 zone_task_hold(zone); 398 mutex_enter(&curproc->p_lock); 399 ancestor_tk = curproc->p_task; 400 task_hold(ancestor_tk); 401 tk->tk_zone = zone; 402 mutex_exit(&curproc->p_lock); 403 404 for (;;) { 405 gp = rctl_set_dup_prealloc(ancestor_tk->tk_rctls); 406 407 mutex_enter(&ancestor_tk->tk_rctls->rcs_lock); 408 if (rctl_set_dup_ready(ancestor_tk->tk_rctls, gp)) 409 break; 410 411 mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 412 413 rctl_prealloc_destroy(gp); 414 } 415 416 /* 417 * At this point, curproc does not have the appropriate linkage 418 * through the task to the project. So, rctl_set_dup should only 419 * copy the rctls, and leave the callbacks for later. 420 */ 421 e.rcep_p.task = tk; 422 e.rcep_t = RCENTITY_TASK; 423 tk->tk_rctls = rctl_set_dup(ancestor_tk->tk_rctls, curproc, curproc, &e, 424 set, gp, RCD_DUP); 425 mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 426 427 rctl_prealloc_destroy(gp); 428 429 /* 430 * Record the ancestor task's ID for use by extended accounting. 431 */ 432 tu->tu_anctaskid = ancestor_tk->tk_tkid; 433 task_rele(ancestor_tk); 434 435 /* 436 * Put new task structure in the hash table. 437 */ 438 (void) mod_hash_reserve(task_hash, &hndl); 439 mutex_enter(&task_hash_lock); 440 ASSERT(task_find(tkid, getzoneid()) == NULL); 441 if (mod_hash_insert_reserve(task_hash, (mod_hash_key_t)(uintptr_t)tkid, 442 (mod_hash_val_t *)tk, hndl) != 0) { 443 mod_hash_cancel(task_hash, &hndl); 444 panic("unable to insert task %d(%p)", tkid, (void *)tk); 445 } 446 mutex_exit(&task_hash_lock); 447 448 return (tk); 449 } 450 451 /* 452 * void task_attach(task_t *, proc_t *) 453 * 454 * Overview 455 * task_attach() is used to attach a process to a task; this operation is only 456 * performed as a result of a fork() or settaskid() system call. The proc_t's 457 * p_tasknext and p_taskprev fields will be set such that the proc_t is a 458 * member of the doubly-linked list of proc_t's that make up the task. 459 * 460 * Return values 461 * None. 462 * 463 * Caller's context 464 * pidlock and p->p_lock must be held on entry. 465 */ 466 void 467 task_attach(task_t *tk, proc_t *p) 468 { 469 proc_t *first, *prev; 470 rctl_entity_p_t e; 471 ASSERT(tk != NULL); 472 ASSERT(p != NULL); 473 ASSERT(MUTEX_HELD(&pidlock)); 474 ASSERT(MUTEX_HELD(&p->p_lock)); 475 476 if (tk->tk_memb_list == NULL) { 477 p->p_tasknext = p; 478 p->p_taskprev = p; 479 } else { 480 first = tk->tk_memb_list; 481 prev = first->p_taskprev; 482 first->p_taskprev = p; 483 p->p_tasknext = first; 484 p->p_taskprev = prev; 485 prev->p_tasknext = p; 486 } 487 tk->tk_memb_list = p; 488 task_hold(tk); 489 p->p_task = tk; 490 491 /* 492 * Now that the linkage from process to task and project is 493 * complete, do the required callbacks for the task and project 494 * rctl sets. 495 */ 496 e.rcep_p.proj = tk->tk_proj; 497 e.rcep_t = RCENTITY_PROJECT; 498 (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_proj->kpj_rctls, NULL, 499 RCD_CALLBACK); 500 501 e.rcep_p.task = tk; 502 e.rcep_t = RCENTITY_TASK; 503 (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_rctls, NULL, 504 RCD_CALLBACK); 505 506 } 507 508 /* 509 * task_begin() 510 * 511 * Overview 512 * A process constructing a new task calls task_begin() to initialize the 513 * task, by attaching itself as a member. 514 * 515 * Return values 516 * None. 517 * 518 * Caller's context 519 * pidlock and p_lock must be held across the call to task_begin(). 520 */ 521 void 522 task_begin(task_t *tk, proc_t *p) 523 { 524 timestruc_t ts; 525 task_usage_t *tu; 526 527 ASSERT(MUTEX_HELD(&pidlock)); 528 ASSERT(MUTEX_HELD(&p->p_lock)); 529 530 mutex_enter(&tk->tk_usage_lock); 531 tu = tk->tk_usage; 532 gethrestime(&ts); 533 tu->tu_startsec = (uint64_t)ts.tv_sec; 534 tu->tu_startnsec = (uint64_t)ts.tv_nsec; 535 mutex_exit(&tk->tk_usage_lock); 536 537 /* 538 * Join process to the task as a member. 539 */ 540 task_attach(tk, p); 541 } 542 543 /* 544 * void task_detach(proc_t *) 545 * 546 * Overview 547 * task_detach() removes the specified process from its task. task_detach 548 * sets the process's task membership to NULL, in anticipation of a final exit 549 * or of joining a new task. Because task_rele() requires a context safe for 550 * KM_SLEEP allocations, a task_detach() is followed by a subsequent 551 * task_rele() once appropriate context is available. 552 * 553 * Because task_detach() involves relinquishing the process's membership in 554 * the project, any observational rctls the process may have had on the task 555 * or project are destroyed. 556 * 557 * Return values 558 * None. 559 * 560 * Caller's context 561 * pidlock and p_lock held across task_detach(). 562 */ 563 void 564 task_detach(proc_t *p) 565 { 566 task_t *tk = p->p_task; 567 568 ASSERT(MUTEX_HELD(&pidlock)); 569 ASSERT(MUTEX_HELD(&p->p_lock)); 570 ASSERT(p->p_task != NULL); 571 ASSERT(tk->tk_memb_list != NULL); 572 573 if (tk->tk_memb_list == p) 574 tk->tk_memb_list = p->p_tasknext; 575 if (tk->tk_memb_list == p) 576 tk->tk_memb_list = NULL; 577 p->p_taskprev->p_tasknext = p->p_tasknext; 578 p->p_tasknext->p_taskprev = p->p_taskprev; 579 580 rctl_set_tearoff(p->p_task->tk_rctls, p); 581 rctl_set_tearoff(p->p_task->tk_proj->kpj_rctls, p); 582 583 p->p_task = NULL; 584 p->p_tasknext = p->p_taskprev = NULL; 585 } 586 587 /* 588 * task_change(task_t *, proc_t *) 589 * 590 * Overview 591 * task_change() removes the specified process from its current task. The 592 * process is then attached to the specified task. This routine is called 593 * from settaskid() when process is being moved to a new task. 594 * 595 * Return values 596 * None. 597 * 598 * Caller's context 599 * pidlock and p_lock held across task_change() 600 */ 601 void 602 task_change(task_t *newtk, proc_t *p) 603 { 604 task_t *oldtk = p->p_task; 605 606 ASSERT(MUTEX_HELD(&pidlock)); 607 ASSERT(MUTEX_HELD(&p->p_lock)); 608 ASSERT(oldtk != NULL); 609 ASSERT(oldtk->tk_memb_list != NULL); 610 611 mutex_enter(&p->p_zone->zone_nlwps_lock); 612 oldtk->tk_nlwps -= p->p_lwpcnt; 613 mutex_exit(&p->p_zone->zone_nlwps_lock); 614 615 mutex_enter(&newtk->tk_zone->zone_nlwps_lock); 616 newtk->tk_nlwps += p->p_lwpcnt; 617 mutex_exit(&newtk->tk_zone->zone_nlwps_lock); 618 619 task_detach(p); 620 task_begin(newtk, p); 621 } 622 623 /* 624 * task_end() 625 * 626 * Overview 627 * task_end() contains the actions executed once the final member of 628 * a task has released the task, and all actions connected with the task, such 629 * as committing an accounting record to a file, are completed. It is called 630 * by the known last consumer of the task information. Additionally, 631 * task_end() must never refer to any process in the system. 632 * 633 * Return values 634 * None. 635 * 636 * Caller's context 637 * No restrictions on context, beyond that given above. 638 */ 639 void 640 task_end(task_t *tk) 641 { 642 ASSERT(tk->tk_hold_count == 0); 643 644 project_rele(tk->tk_proj); 645 kmem_free(tk->tk_usage, sizeof (task_usage_t)); 646 if (tk->tk_prevusage != NULL) 647 kmem_free(tk->tk_prevusage, sizeof (task_usage_t)); 648 if (tk->tk_zoneusage != NULL) 649 kmem_free(tk->tk_zoneusage, sizeof (task_usage_t)); 650 rctl_set_free(tk->tk_rctls); 651 id_free(taskid_space, tk->tk_tkid); 652 zone_task_rele(tk->tk_zone); 653 kmem_cache_free(task_cache, tk); 654 } 655 656 static void 657 changeproj(proc_t *p, kproject_t *kpj, zone_t *zone, void *projbuf, 658 void *zonebuf) 659 { 660 kproject_t *oldkpj; 661 kthread_t *t; 662 663 ASSERT(MUTEX_HELD(&pidlock)); 664 ASSERT(MUTEX_HELD(&p->p_lock)); 665 666 if ((t = p->p_tlist) != NULL) { 667 do { 668 (void) project_hold(kpj); 669 670 thread_lock(t); 671 oldkpj = ttoproj(t); 672 673 /* 674 * Kick this thread so that he doesn't sit 675 * on a wrong wait queue. 676 */ 677 if (ISWAITING(t)) 678 setrun_locked(t); 679 680 /* 681 * The thread wants to go on the project wait queue, but 682 * the waitq is changing. 683 */ 684 if (t->t_schedflag & TS_PROJWAITQ) 685 t->t_schedflag &= ~ TS_PROJWAITQ; 686 687 t->t_proj = kpj; 688 t->t_pre_sys = 1; /* For cred update */ 689 thread_unlock(t); 690 fss_changeproj(t, kpj, zone, projbuf, zonebuf); 691 692 project_rele(oldkpj); 693 } while ((t = t->t_forw) != p->p_tlist); 694 } 695 } 696 697 /* 698 * task_join() 699 * 700 * Overview 701 * task_join() contains the actions that must be executed when the first 702 * member (curproc) of a newly created task joins it. It may never fail. 703 * 704 * The caller must make sure holdlwps() is called so that all other lwps are 705 * stopped prior to calling this function. 706 * 707 * NB: It returns with curproc->p_lock held. 708 * 709 * Return values 710 * Pointer to the old task. 711 * 712 * Caller's context 713 * cpu_lock must be held entering the function. It will acquire pidlock, 714 * p_crlock and p_lock during execution. 715 */ 716 task_t * 717 task_join(task_t *tk, uint_t flags) 718 { 719 proc_t *p = ttoproc(curthread); 720 task_t *prev_tk; 721 void *projbuf, *zonebuf; 722 zone_t *zone = tk->tk_zone; 723 projid_t projid = tk->tk_proj->kpj_id; 724 cred_t *oldcr; 725 726 /* 727 * We can't know for sure if holdlwps() was called, but we can check to 728 * ensure we're single-threaded. 729 */ 730 ASSERT(curthread == p->p_agenttp || p->p_lwprcnt == 1); 731 732 /* 733 * Changing the credential is always hard because we cannot 734 * allocate memory when holding locks but we don't know whether 735 * we need to change it. We first get a reference to the current 736 * cred if we need to change it. Then we create a credential 737 * with an updated project id. Finally we install it, first 738 * releasing the reference we had on the p_cred at the time we 739 * acquired the lock the first time and later we release the 740 * reference to p_cred at the time we acquired the lock the 741 * second time. 742 */ 743 mutex_enter(&p->p_crlock); 744 if (crgetprojid(p->p_cred) == projid) 745 oldcr = NULL; 746 else 747 crhold(oldcr = p->p_cred); 748 mutex_exit(&p->p_crlock); 749 750 if (oldcr != NULL) { 751 cred_t *newcr = crdup(oldcr); 752 crsetprojid(newcr, projid); 753 crfree(oldcr); 754 755 mutex_enter(&p->p_crlock); 756 oldcr = p->p_cred; 757 p->p_cred = newcr; 758 mutex_exit(&p->p_crlock); 759 crfree(oldcr); 760 } 761 762 /* 763 * Make sure that the number of processor sets is constant 764 * across this operation. 765 */ 766 ASSERT(MUTEX_HELD(&cpu_lock)); 767 768 projbuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_PROJ); 769 zonebuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_ZONE); 770 771 mutex_enter(&pidlock); 772 mutex_enter(&p->p_lock); 773 774 prev_tk = p->p_task; 775 task_change(tk, p); 776 777 /* 778 * Now move threads one by one to their new project. 779 */ 780 changeproj(p, tk->tk_proj, zone, projbuf, zonebuf); 781 if (flags & TASK_FINAL) 782 p->p_task->tk_flags |= TASK_FINAL; 783 784 mutex_exit(&pidlock); 785 786 fss_freebuf(zonebuf, FSS_ALLOC_ZONE); 787 fss_freebuf(projbuf, FSS_ALLOC_PROJ); 788 return (prev_tk); 789 } 790 791 /* 792 * rctl ops vectors 793 */ 794 static rctl_ops_t task_lwps_ops = { 795 rcop_no_action, 796 task_lwps_usage, 797 task_lwps_set, 798 task_lwps_test 799 }; 800 801 static rctl_ops_t task_cpu_time_ops = { 802 rcop_no_action, 803 task_cpu_time_usage, 804 rcop_no_set, 805 task_cpu_time_test 806 }; 807 808 /*ARGSUSED*/ 809 /* 810 * void task_init(void) 811 * 812 * Overview 813 * task_init() initializes task-related hashes, caches, and the task id 814 * space. Additionally, task_init() establishes p0 as a member of task0. 815 * Called by main(). 816 * 817 * Return values 818 * None. 819 * 820 * Caller's context 821 * task_init() must be called prior to MP startup. 822 */ 823 void 824 task_init(void) 825 { 826 proc_t *p = &p0; 827 mod_hash_hndl_t hndl; 828 rctl_set_t *set; 829 rctl_alloc_gp_t *gp; 830 rctl_entity_p_t e; 831 /* 832 * Initialize task_cache and taskid_space. 833 */ 834 task_cache = kmem_cache_create("task_cache", sizeof (task_t), 835 0, NULL, NULL, NULL, NULL, NULL, 0); 836 taskid_space = id_space_create("taskid_space", 0, MAX_TASKID); 837 838 /* 839 * Initialize task hash table. 840 */ 841 task_hash = mod_hash_create_idhash("task_hash", task_hash_size, 842 mod_hash_null_valdtor); 843 844 /* 845 * Initialize task-based rctls. 846 */ 847 rc_task_lwps = rctl_register("task.max-lwps", RCENTITY_TASK, 848 RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_COUNT, INT_MAX, INT_MAX, 849 &task_lwps_ops); 850 rc_task_cpu_time = rctl_register("task.max-cpu-time", RCENTITY_TASK, 851 RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_DENY_NEVER | 852 RCTL_GLOBAL_CPU_TIME | RCTL_GLOBAL_INFINITE | 853 RCTL_GLOBAL_UNOBSERVABLE | RCTL_GLOBAL_SECONDS, UINT64_MAX, 854 UINT64_MAX, &task_cpu_time_ops); 855 856 /* 857 * Create task0 and place p0 in it as a member. 858 */ 859 task0p = kmem_cache_alloc(task_cache, KM_SLEEP); 860 bzero(task0p, sizeof (task_t)); 861 862 task0p->tk_tkid = id_alloc(taskid_space); 863 task0p->tk_usage = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 864 task0p->tk_proj = project_hold_by_id(0, &zone0, 865 PROJECT_HOLD_INSERT); 866 task0p->tk_flags = TASK_NORMAL; 867 task0p->tk_nlwps = p->p_lwpcnt; 868 task0p->tk_zone = global_zone; 869 870 set = rctl_set_create(); 871 gp = rctl_set_init_prealloc(RCENTITY_TASK); 872 mutex_enter(&curproc->p_lock); 873 e.rcep_p.task = task0p; 874 e.rcep_t = RCENTITY_TASK; 875 task0p->tk_rctls = rctl_set_init(RCENTITY_TASK, curproc, &e, set, gp); 876 mutex_exit(&curproc->p_lock); 877 rctl_prealloc_destroy(gp); 878 879 (void) mod_hash_reserve(task_hash, &hndl); 880 mutex_enter(&task_hash_lock); 881 ASSERT(task_find(task0p->tk_tkid, GLOBAL_ZONEID) == NULL); 882 if (mod_hash_insert_reserve(task_hash, 883 (mod_hash_key_t)(uintptr_t)task0p->tk_tkid, 884 (mod_hash_val_t *)task0p, hndl) != 0) { 885 mod_hash_cancel(task_hash, &hndl); 886 panic("unable to insert task %d(%p)", task0p->tk_tkid, 887 (void *)task0p); 888 } 889 mutex_exit(&task_hash_lock); 890 891 task0p->tk_memb_list = p; 892 893 /* 894 * Initialize task pointers for p0, including doubly linked list of task 895 * members. 896 */ 897 p->p_task = task0p; 898 p->p_taskprev = p->p_tasknext = p; 899 task_hold(task0p); 900 } 901