1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/atomic.h> 29 #include <sys/cmn_err.h> 30 #include <sys/exacct.h> 31 #include <sys/id_space.h> 32 #include <sys/kmem.h> 33 #include <sys/modhash.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/project.h> 37 #include <sys/rctl.h> 38 #include <sys/systm.h> 39 #include <sys/task.h> 40 #include <sys/time.h> 41 #include <sys/types.h> 42 #include <sys/zone.h> 43 #include <sys/cpuvar.h> 44 #include <sys/fss.h> 45 #include <sys/class.h> 46 #include <sys/project.h> 47 48 /* 49 * Tasks 50 * 51 * A task is a collection of processes, associated with a common project ID 52 * and related by a common initial parent. The task primarily represents a 53 * natural process sequence with known resource usage, although it can also be 54 * viewed as a convenient grouping of processes for signal delivery, processor 55 * binding, and administrative operations. 56 * 57 * Membership and observership 58 * We can conceive of situations where processes outside of the task may wish 59 * to examine the resource usage of the task. Similarly, a number of the 60 * administrative operations on a task can be performed by processes who are 61 * not members of the task. Accordingly, we must design a locking strategy 62 * where observers of the task, who wish to examine or operate on the task, 63 * and members of task, who can perform the mentioned operations, as well as 64 * leave the task, see a consistent and correct representation of the task at 65 * all times. 66 * 67 * Locking 68 * Because the task membership is a new relation between processes, its 69 * locking becomes an additional responsibility of the pidlock/p_lock locking 70 * sequence; however, tasks closely resemble sessions and the session locking 71 * model is mostly appropriate for the interaction of tasks, processes, and 72 * procfs. 73 * 74 * kmutex_t task_hash_lock 75 * task_hash_lock is a global lock protecting the contents of the task 76 * ID-to-task pointer hash. Holders of task_hash_lock must not attempt to 77 * acquire pidlock or p_lock. 78 * uint_t tk_hold_count 79 * tk_hold_count, the number of members and observers of the current task, 80 * must be manipulated atomically. 81 * proc_t *tk_memb_list 82 * proc_t *p_tasknext 83 * proc_t *p_taskprev 84 * The task's membership list is protected by pidlock, and is therefore 85 * always acquired before any of its members' p_lock mutexes. The p_task 86 * member of the proc structure is protected by pidlock or p_lock for 87 * reading, and by both pidlock and p_lock for modification, as is done for 88 * p_sessp. The key point is that only the process can modify its p_task, 89 * and not any entity on the system. (/proc will use prlock() to prevent 90 * the process from leaving, as opposed to pidlock.) 91 * kmutex_t tk_usage_lock 92 * tk_usage_lock is a per-task lock protecting the contents of the task 93 * usage structure and tk_nlwps counter for the task.max-lwps resource 94 * control. 95 */ 96 97 int task_hash_size = 256; 98 static kmutex_t task_hash_lock; 99 static mod_hash_t *task_hash; 100 101 static id_space_t *taskid_space; /* global taskid space */ 102 static kmem_cache_t *task_cache; /* kmem cache for task structures */ 103 104 rctl_hndl_t rc_task_lwps; 105 rctl_hndl_t rc_task_cpu_time; 106 107 /* 108 * static rctl_qty_t task_usage_lwps(void *taskp) 109 * 110 * Overview 111 * task_usage_lwps() is the usage operation for the resource control 112 * associated with the number of LWPs in a task. 113 * 114 * Return values 115 * The number of LWPs in the given task is returned. 116 * 117 * Caller's context 118 * The p->p_lock must be held across the call. 119 */ 120 /*ARGSUSED*/ 121 static rctl_qty_t 122 task_lwps_usage(rctl_t *r, proc_t *p) 123 { 124 task_t *t; 125 rctl_qty_t nlwps; 126 127 ASSERT(MUTEX_HELD(&p->p_lock)); 128 129 t = p->p_task; 130 mutex_enter(&p->p_zone->zone_nlwps_lock); 131 nlwps = t->tk_nlwps; 132 mutex_exit(&p->p_zone->zone_nlwps_lock); 133 134 return (nlwps); 135 } 136 137 /* 138 * static int task_test_lwps(void *taskp, rctl_val_t *, int64_t incr, 139 * int flags) 140 * 141 * Overview 142 * task_test_lwps() is the test-if-valid-increment for the resource control 143 * for the number of processes in a task. 144 * 145 * Return values 146 * 0 if the threshold limit was not passed, 1 if the limit was passed. 147 * 148 * Caller's context 149 * p->p_lock must be held across the call. 150 */ 151 /*ARGSUSED*/ 152 static int 153 task_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl, 154 rctl_qty_t incr, 155 uint_t flags) 156 { 157 rctl_qty_t nlwps; 158 159 ASSERT(MUTEX_HELD(&p->p_lock)); 160 ASSERT(e->rcep_t == RCENTITY_TASK); 161 if (e->rcep_p.task == NULL) 162 return (0); 163 164 ASSERT(MUTEX_HELD(&(e->rcep_p.task->tk_zone->zone_nlwps_lock))); 165 nlwps = e->rcep_p.task->tk_nlwps; 166 167 if (nlwps + incr > rcntl->rcv_value) 168 return (1); 169 170 return (0); 171 } 172 /*ARGSUSED*/ 173 static int 174 task_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) { 175 176 ASSERT(MUTEX_HELD(&p->p_lock)); 177 ASSERT(e->rcep_t == RCENTITY_TASK); 178 if (e->rcep_p.task == NULL) 179 return (0); 180 181 e->rcep_p.task->tk_nlwps_ctl = nv; 182 return (0); 183 } 184 185 /* 186 * static rctl_qty_t task_usage_cpu_secs(void *taskp) 187 * 188 * Overview 189 * task_usage_cpu_secs() is the usage operation for the resource control 190 * associated with the total accrued CPU seconds for a task. 191 * 192 * Return values 193 * The number of CPU seconds consumed by the task is returned. 194 * 195 * Caller's context 196 * The given task must be held across the call. 197 */ 198 /*ARGSUSED*/ 199 static rctl_qty_t 200 task_cpu_time_usage(rctl_t *r, proc_t *p) 201 { 202 task_t *t = p->p_task; 203 204 ASSERT(MUTEX_HELD(&p->p_lock)); 205 return (t->tk_cpu_time); 206 } 207 208 /* 209 * int task_cpu_time_incr(task_t *t, rctl_qty_t incr) 210 * 211 * Overview 212 * task_cpu_time_incr() increments the amount of CPU time used 213 * by this task. 214 * 215 * Return values 216 * 1 if a second or more time is accumulated 217 * 0 otherwise 218 * 219 * Caller's context 220 * This is called by the clock tick accounting function to charge 221 * CPU time to a task. 222 */ 223 rctl_qty_t 224 task_cpu_time_incr(task_t *t, rctl_qty_t incr) 225 { 226 rctl_qty_t ret = 0; 227 228 mutex_enter(&t->tk_cpu_time_lock); 229 t->tk_cpu_ticks += incr; 230 if (t->tk_cpu_ticks >= hz) { 231 t->tk_cpu_time += t->tk_cpu_ticks / hz; 232 t->tk_cpu_ticks = t->tk_cpu_ticks % hz; 233 ret = t->tk_cpu_time; 234 } 235 mutex_exit(&t->tk_cpu_time_lock); 236 237 return (ret); 238 } 239 240 /* 241 * static int task_test_cpu_secs(void *taskp, rctl_val_t *, int64_t incr, 242 * int flags) 243 * 244 * Overview 245 * task_test_cpu_secs() is the test-if-valid-increment for the resource 246 * control for the total accrued CPU seconds for a task. 247 * 248 * Return values 249 * 0 if the threshold limit was not passed, 1 if the limit was passed. 250 * 251 * Caller's context 252 * The given task must be held across the call. 253 */ 254 /*ARGSUSED*/ 255 static int 256 task_cpu_time_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, 257 struct rctl_val *rcntl, rctl_qty_t incr, uint_t flags) 258 { 259 ASSERT(MUTEX_HELD(&p->p_lock)); 260 ASSERT(e->rcep_t == RCENTITY_TASK); 261 if (e->rcep_p.task == NULL) 262 return (0); 263 264 if (incr >= rcntl->rcv_value) 265 return (1); 266 267 return (0); 268 } 269 270 static task_t * 271 task_find(taskid_t id, zoneid_t zoneid) 272 { 273 task_t *tk; 274 275 ASSERT(MUTEX_HELD(&task_hash_lock)); 276 277 if (mod_hash_find(task_hash, (mod_hash_key_t)(uintptr_t)id, 278 (mod_hash_val_t *)&tk) == MH_ERR_NOTFOUND || 279 (zoneid != ALL_ZONES && zoneid != tk->tk_zone->zone_id)) 280 return (NULL); 281 282 return (tk); 283 } 284 285 /* 286 * task_hold_by_id(), task_hold_by_id_zone() 287 * 288 * Overview 289 * task_hold_by_id() is used to take a reference on a task by its task id, 290 * supporting the various system call interfaces for obtaining resource data, 291 * delivering signals, and so forth. 292 * 293 * Return values 294 * Returns a pointer to the task_t with taskid_t id. The task is returned 295 * with its hold count incremented by one. Returns NULL if there 296 * is no task with the requested id. 297 * 298 * Caller's context 299 * Caller must not be holding task_hash_lock. No restrictions on context. 300 */ 301 task_t * 302 task_hold_by_id_zone(taskid_t id, zoneid_t zoneid) 303 { 304 task_t *tk; 305 306 mutex_enter(&task_hash_lock); 307 if ((tk = task_find(id, zoneid)) != NULL) 308 atomic_add_32(&tk->tk_hold_count, 1); 309 mutex_exit(&task_hash_lock); 310 311 return (tk); 312 } 313 314 task_t * 315 task_hold_by_id(taskid_t id) 316 { 317 zoneid_t zoneid; 318 319 if (INGLOBALZONE(curproc)) 320 zoneid = ALL_ZONES; 321 else 322 zoneid = getzoneid(); 323 return (task_hold_by_id_zone(id, zoneid)); 324 } 325 326 /* 327 * void task_hold(task_t *) 328 * 329 * Overview 330 * task_hold() is used to take an additional reference to the given task. 331 * 332 * Return values 333 * None. 334 * 335 * Caller's context 336 * No restriction on context. 337 */ 338 void 339 task_hold(task_t *tk) 340 { 341 atomic_add_32(&tk->tk_hold_count, 1); 342 } 343 344 /* 345 * void task_rele(task_t *) 346 * 347 * Overview 348 * task_rele() relinquishes a reference on the given task, which was acquired 349 * via task_hold() or task_hold_by_id(). If this is the last member or 350 * observer of the task, dispatch it for commitment via the accounting 351 * subsystem. 352 * 353 * Return values 354 * None. 355 * 356 * Caller's context 357 * Caller must not be holding the task_hash_lock. 358 * Caller's context must be acceptable for KM_SLEEP allocations. 359 */ 360 void 361 task_rele(task_t *tk) 362 { 363 mutex_enter(&task_hash_lock); 364 if (atomic_add_32_nv(&tk->tk_hold_count, -1) > 0) { 365 mutex_exit(&task_hash_lock); 366 return; 367 } 368 369 mutex_enter(&tk->tk_zone->zone_nlwps_lock); 370 tk->tk_proj->kpj_ntasks--; 371 mutex_exit(&tk->tk_zone->zone_nlwps_lock); 372 373 if (mod_hash_destroy(task_hash, 374 (mod_hash_key_t)(uintptr_t)tk->tk_tkid) != 0) 375 panic("unable to delete task %d", tk->tk_tkid); 376 mutex_exit(&task_hash_lock); 377 378 /* 379 * At this point, there are no members or observers of the task, so we 380 * can safely send it on for commitment to the accounting subsystem. 381 * The task will be destroyed in task_end() subsequent to commitment. 382 */ 383 (void) taskq_dispatch(exacct_queue, exacct_commit_task, tk, KM_SLEEP); 384 } 385 386 /* 387 * task_t *task_create(projid_t, zone *) 388 * 389 * Overview 390 * A process constructing a new task calls task_create() to construct and 391 * preinitialize the task for the appropriate destination project. Only one 392 * task, the primordial task0, is not created with task_create(). 393 * 394 * Return values 395 * None. 396 * 397 * Caller's context 398 * Caller's context should be safe for KM_SLEEP allocations. 399 * The caller should appropriately bump the kpj_ntasks counter on the 400 * project that contains this task. 401 */ 402 task_t * 403 task_create(projid_t projid, zone_t *zone) 404 { 405 task_t *tk = kmem_cache_alloc(task_cache, KM_SLEEP); 406 task_t *ancestor_tk; 407 taskid_t tkid; 408 task_usage_t *tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 409 mod_hash_hndl_t hndl; 410 rctl_set_t *set = rctl_set_create(); 411 rctl_alloc_gp_t *gp; 412 rctl_entity_p_t e; 413 414 bzero(tk, sizeof (task_t)); 415 416 tk->tk_tkid = tkid = id_alloc(taskid_space); 417 tk->tk_nlwps = 0; 418 tk->tk_nlwps_ctl = INT_MAX; 419 tk->tk_usage = tu; 420 tk->tk_inherited = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 421 tk->tk_proj = project_hold_by_id(projid, zone, PROJECT_HOLD_INSERT); 422 tk->tk_flags = TASK_NORMAL; 423 424 /* 425 * Copy ancestor task's resource controls. 426 */ 427 zone_task_hold(zone); 428 mutex_enter(&curproc->p_lock); 429 ancestor_tk = curproc->p_task; 430 task_hold(ancestor_tk); 431 tk->tk_zone = zone; 432 mutex_exit(&curproc->p_lock); 433 434 for (;;) { 435 gp = rctl_set_dup_prealloc(ancestor_tk->tk_rctls); 436 437 mutex_enter(&ancestor_tk->tk_rctls->rcs_lock); 438 if (rctl_set_dup_ready(ancestor_tk->tk_rctls, gp)) 439 break; 440 441 mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 442 443 rctl_prealloc_destroy(gp); 444 } 445 446 /* 447 * At this point, curproc does not have the appropriate linkage 448 * through the task to the project. So, rctl_set_dup should only 449 * copy the rctls, and leave the callbacks for later. 450 */ 451 e.rcep_p.task = tk; 452 e.rcep_t = RCENTITY_TASK; 453 tk->tk_rctls = rctl_set_dup(ancestor_tk->tk_rctls, curproc, curproc, &e, 454 set, gp, RCD_DUP); 455 mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 456 457 rctl_prealloc_destroy(gp); 458 459 /* 460 * Record the ancestor task's ID for use by extended accounting. 461 */ 462 tu->tu_anctaskid = ancestor_tk->tk_tkid; 463 task_rele(ancestor_tk); 464 465 /* 466 * Put new task structure in the hash table. 467 */ 468 (void) mod_hash_reserve(task_hash, &hndl); 469 mutex_enter(&task_hash_lock); 470 ASSERT(task_find(tkid, getzoneid()) == NULL); 471 if (mod_hash_insert_reserve(task_hash, (mod_hash_key_t)(uintptr_t)tkid, 472 (mod_hash_val_t *)tk, hndl) != 0) { 473 mod_hash_cancel(task_hash, &hndl); 474 panic("unable to insert task %d(%p)", tkid, (void *)tk); 475 } 476 mutex_exit(&task_hash_lock); 477 478 return (tk); 479 } 480 481 /* 482 * void task_attach(task_t *, proc_t *) 483 * 484 * Overview 485 * task_attach() is used to attach a process to a task; this operation is only 486 * performed as a result of a fork() or settaskid() system call. The proc_t's 487 * p_tasknext and p_taskprev fields will be set such that the proc_t is a 488 * member of the doubly-linked list of proc_t's that make up the task. 489 * 490 * Return values 491 * None. 492 * 493 * Caller's context 494 * pidlock and p->p_lock must be held on entry. 495 */ 496 void 497 task_attach(task_t *tk, proc_t *p) 498 { 499 proc_t *first, *prev; 500 rctl_entity_p_t e; 501 ASSERT(tk != NULL); 502 ASSERT(p != NULL); 503 ASSERT(MUTEX_HELD(&pidlock)); 504 ASSERT(MUTEX_HELD(&p->p_lock)); 505 506 if (tk->tk_memb_list == NULL) { 507 p->p_tasknext = p; 508 p->p_taskprev = p; 509 } else { 510 first = tk->tk_memb_list; 511 prev = first->p_taskprev; 512 first->p_taskprev = p; 513 p->p_tasknext = first; 514 p->p_taskprev = prev; 515 prev->p_tasknext = p; 516 } 517 tk->tk_memb_list = p; 518 task_hold(tk); 519 p->p_task = tk; 520 521 /* 522 * Now that the linkage from process to task and project is 523 * complete, do the required callbacks for the task and project 524 * rctl sets. 525 */ 526 e.rcep_p.proj = tk->tk_proj; 527 e.rcep_t = RCENTITY_PROJECT; 528 (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_proj->kpj_rctls, NULL, 529 RCD_CALLBACK); 530 531 e.rcep_p.task = tk; 532 e.rcep_t = RCENTITY_TASK; 533 (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_rctls, NULL, 534 RCD_CALLBACK); 535 536 } 537 538 /* 539 * task_begin() 540 * 541 * Overview 542 * A process constructing a new task calls task_begin() to initialize the 543 * task, by attaching itself as a member. 544 * 545 * Return values 546 * None. 547 * 548 * Caller's context 549 * pidlock and p_lock must be held across the call to task_begin(). 550 */ 551 void 552 task_begin(task_t *tk, proc_t *p) 553 { 554 timestruc_t ts; 555 task_usage_t *tu; 556 557 ASSERT(MUTEX_HELD(&pidlock)); 558 ASSERT(MUTEX_HELD(&p->p_lock)); 559 560 mutex_enter(&tk->tk_usage_lock); 561 tu = tk->tk_usage; 562 gethrestime(&ts); 563 tu->tu_startsec = (uint64_t)ts.tv_sec; 564 tu->tu_startnsec = (uint64_t)ts.tv_nsec; 565 mutex_exit(&tk->tk_usage_lock); 566 567 /* 568 * Join process to the task as a member. 569 */ 570 task_attach(tk, p); 571 } 572 573 /* 574 * void task_detach(proc_t *) 575 * 576 * Overview 577 * task_detach() removes the specified process from its task. task_detach 578 * sets the process's task membership to NULL, in anticipation of a final exit 579 * or of joining a new task. Because task_rele() requires a context safe for 580 * KM_SLEEP allocations, a task_detach() is followed by a subsequent 581 * task_rele() once appropriate context is available. 582 * 583 * Because task_detach() involves relinquishing the process's membership in 584 * the project, any observational rctls the process may have had on the task 585 * or project are destroyed. 586 * 587 * Return values 588 * None. 589 * 590 * Caller's context 591 * pidlock and p_lock held across task_detach(). 592 */ 593 void 594 task_detach(proc_t *p) 595 { 596 task_t *tk = p->p_task; 597 598 ASSERT(MUTEX_HELD(&pidlock)); 599 ASSERT(MUTEX_HELD(&p->p_lock)); 600 ASSERT(p->p_task != NULL); 601 ASSERT(tk->tk_memb_list != NULL); 602 603 if (tk->tk_memb_list == p) 604 tk->tk_memb_list = p->p_tasknext; 605 if (tk->tk_memb_list == p) 606 tk->tk_memb_list = NULL; 607 p->p_taskprev->p_tasknext = p->p_tasknext; 608 p->p_tasknext->p_taskprev = p->p_taskprev; 609 610 rctl_set_tearoff(p->p_task->tk_rctls, p); 611 rctl_set_tearoff(p->p_task->tk_proj->kpj_rctls, p); 612 613 p->p_task = NULL; 614 p->p_tasknext = p->p_taskprev = NULL; 615 } 616 617 /* 618 * task_change(task_t *, proc_t *) 619 * 620 * Overview 621 * task_change() removes the specified process from its current task. The 622 * process is then attached to the specified task. This routine is called 623 * from settaskid() when process is being moved to a new task. 624 * 625 * Return values 626 * None. 627 * 628 * Caller's context 629 * pidlock and p_lock held across task_change() 630 */ 631 void 632 task_change(task_t *newtk, proc_t *p) 633 { 634 task_t *oldtk = p->p_task; 635 636 ASSERT(MUTEX_HELD(&pidlock)); 637 ASSERT(MUTEX_HELD(&p->p_lock)); 638 ASSERT(oldtk != NULL); 639 ASSERT(oldtk->tk_memb_list != NULL); 640 641 mutex_enter(&p->p_zone->zone_nlwps_lock); 642 oldtk->tk_nlwps -= p->p_lwpcnt; 643 mutex_exit(&p->p_zone->zone_nlwps_lock); 644 645 mutex_enter(&newtk->tk_zone->zone_nlwps_lock); 646 newtk->tk_nlwps += p->p_lwpcnt; 647 mutex_exit(&newtk->tk_zone->zone_nlwps_lock); 648 649 task_detach(p); 650 task_begin(newtk, p); 651 exacct_move_mstate(p, oldtk, newtk); 652 } 653 654 /* 655 * task_end() 656 * 657 * Overview 658 * task_end() contains the actions executed once the final member of 659 * a task has released the task, and all actions connected with the task, such 660 * as committing an accounting record to a file, are completed. It is called 661 * by the known last consumer of the task information. Additionally, 662 * task_end() must never refer to any process in the system. 663 * 664 * Return values 665 * None. 666 * 667 * Caller's context 668 * No restrictions on context, beyond that given above. 669 */ 670 void 671 task_end(task_t *tk) 672 { 673 ASSERT(tk->tk_hold_count == 0); 674 675 project_rele(tk->tk_proj); 676 kmem_free(tk->tk_usage, sizeof (task_usage_t)); 677 kmem_free(tk->tk_inherited, sizeof (task_usage_t)); 678 if (tk->tk_prevusage != NULL) 679 kmem_free(tk->tk_prevusage, sizeof (task_usage_t)); 680 if (tk->tk_zoneusage != NULL) 681 kmem_free(tk->tk_zoneusage, sizeof (task_usage_t)); 682 rctl_set_free(tk->tk_rctls); 683 id_free(taskid_space, tk->tk_tkid); 684 zone_task_rele(tk->tk_zone); 685 kmem_cache_free(task_cache, tk); 686 } 687 688 static void 689 changeproj(proc_t *p, kproject_t *kpj, zone_t *zone, void *projbuf, 690 void *zonebuf) 691 { 692 kproject_t *oldkpj; 693 kthread_t *t; 694 695 ASSERT(MUTEX_HELD(&pidlock)); 696 ASSERT(MUTEX_HELD(&p->p_lock)); 697 698 if ((t = p->p_tlist) != NULL) { 699 do { 700 (void) project_hold(kpj); 701 702 thread_lock(t); 703 oldkpj = ttoproj(t); 704 705 /* 706 * Kick this thread so that he doesn't sit 707 * on a wrong wait queue. 708 */ 709 if (ISWAITING(t)) 710 setrun_locked(t); 711 712 /* 713 * The thread wants to go on the project wait queue, but 714 * the waitq is changing. 715 */ 716 if (t->t_schedflag & TS_PROJWAITQ) 717 t->t_schedflag &= ~ TS_PROJWAITQ; 718 719 t->t_proj = kpj; 720 t->t_pre_sys = 1; /* For cred update */ 721 thread_unlock(t); 722 fss_changeproj(t, kpj, zone, projbuf, zonebuf); 723 724 project_rele(oldkpj); 725 } while ((t = t->t_forw) != p->p_tlist); 726 } 727 } 728 729 /* 730 * task_join() 731 * 732 * Overview 733 * task_join() contains the actions that must be executed when the first 734 * member (curproc) of a newly created task joins it. It may never fail. 735 * 736 * The caller must make sure holdlwps() is called so that all other lwps are 737 * stopped prior to calling this function. 738 * 739 * NB: It returns with curproc->p_lock held. 740 * 741 * Return values 742 * Pointer to the old task. 743 * 744 * Caller's context 745 * cpu_lock must be held entering the function. It will acquire pidlock, 746 * p_crlock and p_lock during execution. 747 */ 748 task_t * 749 task_join(task_t *tk, uint_t flags) 750 { 751 proc_t *p = ttoproc(curthread); 752 task_t *prev_tk; 753 void *projbuf, *zonebuf; 754 zone_t *zone = tk->tk_zone; 755 projid_t projid = tk->tk_proj->kpj_id; 756 cred_t *oldcr; 757 758 /* 759 * We can't know for sure if holdlwps() was called, but we can check to 760 * ensure we're single-threaded. 761 */ 762 ASSERT(curthread == p->p_agenttp || p->p_lwprcnt == 1); 763 764 /* 765 * Changing the credential is always hard because we cannot 766 * allocate memory when holding locks but we don't know whether 767 * we need to change it. We first get a reference to the current 768 * cred if we need to change it. Then we create a credential 769 * with an updated project id. Finally we install it, first 770 * releasing the reference we had on the p_cred at the time we 771 * acquired the lock the first time and later we release the 772 * reference to p_cred at the time we acquired the lock the 773 * second time. 774 */ 775 mutex_enter(&p->p_crlock); 776 if (crgetprojid(p->p_cred) == projid) 777 oldcr = NULL; 778 else 779 crhold(oldcr = p->p_cred); 780 mutex_exit(&p->p_crlock); 781 782 if (oldcr != NULL) { 783 cred_t *newcr = crdup(oldcr); 784 crsetprojid(newcr, projid); 785 crfree(oldcr); 786 787 mutex_enter(&p->p_crlock); 788 oldcr = p->p_cred; 789 p->p_cred = newcr; 790 mutex_exit(&p->p_crlock); 791 crfree(oldcr); 792 } 793 794 /* 795 * Make sure that the number of processor sets is constant 796 * across this operation. 797 */ 798 ASSERT(MUTEX_HELD(&cpu_lock)); 799 800 projbuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_PROJ); 801 zonebuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_ZONE); 802 803 mutex_enter(&pidlock); 804 mutex_enter(&p->p_lock); 805 806 prev_tk = p->p_task; 807 task_change(tk, p); 808 809 /* 810 * Now move threads one by one to their new project. 811 */ 812 changeproj(p, tk->tk_proj, zone, projbuf, zonebuf); 813 if (flags & TASK_FINAL) 814 p->p_task->tk_flags |= TASK_FINAL; 815 816 mutex_exit(&pidlock); 817 818 fss_freebuf(zonebuf, FSS_ALLOC_ZONE); 819 fss_freebuf(projbuf, FSS_ALLOC_PROJ); 820 return (prev_tk); 821 } 822 823 /* 824 * rctl ops vectors 825 */ 826 static rctl_ops_t task_lwps_ops = { 827 rcop_no_action, 828 task_lwps_usage, 829 task_lwps_set, 830 task_lwps_test 831 }; 832 833 static rctl_ops_t task_cpu_time_ops = { 834 rcop_no_action, 835 task_cpu_time_usage, 836 rcop_no_set, 837 task_cpu_time_test 838 }; 839 840 /*ARGSUSED*/ 841 /* 842 * void task_init(void) 843 * 844 * Overview 845 * task_init() initializes task-related hashes, caches, and the task id 846 * space. Additionally, task_init() establishes p0 as a member of task0. 847 * Called by main(). 848 * 849 * Return values 850 * None. 851 * 852 * Caller's context 853 * task_init() must be called prior to MP startup. 854 */ 855 void 856 task_init(void) 857 { 858 proc_t *p = &p0; 859 mod_hash_hndl_t hndl; 860 rctl_set_t *set; 861 rctl_alloc_gp_t *gp; 862 rctl_entity_p_t e; 863 /* 864 * Initialize task_cache and taskid_space. 865 */ 866 task_cache = kmem_cache_create("task_cache", sizeof (task_t), 867 0, NULL, NULL, NULL, NULL, NULL, 0); 868 taskid_space = id_space_create("taskid_space", 0, MAX_TASKID); 869 870 /* 871 * Initialize task hash table. 872 */ 873 task_hash = mod_hash_create_idhash("task_hash", task_hash_size, 874 mod_hash_null_valdtor); 875 876 /* 877 * Initialize task-based rctls. 878 */ 879 rc_task_lwps = rctl_register("task.max-lwps", RCENTITY_TASK, 880 RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_COUNT, INT_MAX, INT_MAX, 881 &task_lwps_ops); 882 rc_task_cpu_time = rctl_register("task.max-cpu-time", RCENTITY_TASK, 883 RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_DENY_NEVER | 884 RCTL_GLOBAL_CPU_TIME | RCTL_GLOBAL_INFINITE | 885 RCTL_GLOBAL_UNOBSERVABLE | RCTL_GLOBAL_SECONDS, UINT64_MAX, 886 UINT64_MAX, &task_cpu_time_ops); 887 888 /* 889 * Create task0 and place p0 in it as a member. 890 */ 891 task0p = kmem_cache_alloc(task_cache, KM_SLEEP); 892 bzero(task0p, sizeof (task_t)); 893 894 task0p->tk_tkid = id_alloc(taskid_space); 895 task0p->tk_usage = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 896 task0p->tk_inherited = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 897 task0p->tk_proj = project_hold_by_id(0, &zone0, 898 PROJECT_HOLD_INSERT); 899 task0p->tk_flags = TASK_NORMAL; 900 task0p->tk_nlwps = p->p_lwpcnt; 901 task0p->tk_zone = global_zone; 902 903 set = rctl_set_create(); 904 gp = rctl_set_init_prealloc(RCENTITY_TASK); 905 mutex_enter(&curproc->p_lock); 906 e.rcep_p.task = task0p; 907 e.rcep_t = RCENTITY_TASK; 908 task0p->tk_rctls = rctl_set_init(RCENTITY_TASK, curproc, &e, set, gp); 909 mutex_exit(&curproc->p_lock); 910 rctl_prealloc_destroy(gp); 911 912 (void) mod_hash_reserve(task_hash, &hndl); 913 mutex_enter(&task_hash_lock); 914 ASSERT(task_find(task0p->tk_tkid, GLOBAL_ZONEID) == NULL); 915 if (mod_hash_insert_reserve(task_hash, 916 (mod_hash_key_t)(uintptr_t)task0p->tk_tkid, 917 (mod_hash_val_t *)task0p, hndl) != 0) { 918 mod_hash_cancel(task_hash, &hndl); 919 panic("unable to insert task %d(%p)", task0p->tk_tkid, 920 (void *)task0p); 921 } 922 mutex_exit(&task_hash_lock); 923 924 task0p->tk_memb_list = p; 925 926 /* 927 * Initialize task pointers for p0, including doubly linked list of task 928 * members. 929 */ 930 p->p_task = task0p; 931 p->p_taskprev = p->p_tasknext = p; 932 task_hold(task0p); 933 } 934