17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50209230bSgjelinek * Common Development and Distribution License (the "License"). 60209230bSgjelinek * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*c97ad5cdSakolb * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 277c478bd9Sstevel@tonic-gate 287c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 297c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 307c478bd9Sstevel@tonic-gate #include <sys/exacct.h> 317c478bd9Sstevel@tonic-gate #include <sys/id_space.h> 327c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 337c478bd9Sstevel@tonic-gate #include <sys/modhash.h> 347c478bd9Sstevel@tonic-gate #include <sys/mutex.h> 357c478bd9Sstevel@tonic-gate #include <sys/proc.h> 367c478bd9Sstevel@tonic-gate #include <sys/project.h> 377c478bd9Sstevel@tonic-gate #include <sys/rctl.h> 387c478bd9Sstevel@tonic-gate #include <sys/systm.h> 397c478bd9Sstevel@tonic-gate #include <sys/task.h> 407c478bd9Sstevel@tonic-gate #include <sys/time.h> 417c478bd9Sstevel@tonic-gate #include <sys/types.h> 427c478bd9Sstevel@tonic-gate #include <sys/zone.h> 437c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 447c478bd9Sstevel@tonic-gate #include <sys/fss.h> 457c478bd9Sstevel@tonic-gate #include <sys/class.h> 467c478bd9Sstevel@tonic-gate #include <sys/project.h> 477c478bd9Sstevel@tonic-gate 487c478bd9Sstevel@tonic-gate /* 497c478bd9Sstevel@tonic-gate * Tasks 507c478bd9Sstevel@tonic-gate * 517c478bd9Sstevel@tonic-gate * A task is a collection of processes, associated with a common project ID 527c478bd9Sstevel@tonic-gate * and related by a common initial parent. The task primarily represents a 537c478bd9Sstevel@tonic-gate * natural process sequence with known resource usage, although it can also be 547c478bd9Sstevel@tonic-gate * viewed as a convenient grouping of processes for signal delivery, processor 557c478bd9Sstevel@tonic-gate * binding, and administrative operations. 567c478bd9Sstevel@tonic-gate * 577c478bd9Sstevel@tonic-gate * Membership and observership 587c478bd9Sstevel@tonic-gate * We can conceive of situations where processes outside of the task may wish 597c478bd9Sstevel@tonic-gate * to examine the resource usage of the task. Similarly, a number of the 607c478bd9Sstevel@tonic-gate * administrative operations on a task can be performed by processes who are 617c478bd9Sstevel@tonic-gate * not members of the task. Accordingly, we must design a locking strategy 627c478bd9Sstevel@tonic-gate * where observers of the task, who wish to examine or operate on the task, 637c478bd9Sstevel@tonic-gate * and members of task, who can perform the mentioned operations, as well as 647c478bd9Sstevel@tonic-gate * leave the task, see a consistent and correct representation of the task at 657c478bd9Sstevel@tonic-gate * all times. 667c478bd9Sstevel@tonic-gate * 677c478bd9Sstevel@tonic-gate * Locking 687c478bd9Sstevel@tonic-gate * Because the task membership is a new relation between processes, its 697c478bd9Sstevel@tonic-gate * locking becomes an additional responsibility of the pidlock/p_lock locking 707c478bd9Sstevel@tonic-gate * sequence; however, tasks closely resemble sessions and the session locking 717c478bd9Sstevel@tonic-gate * model is mostly appropriate for the interaction of tasks, processes, and 727c478bd9Sstevel@tonic-gate * procfs. 737c478bd9Sstevel@tonic-gate * 747c478bd9Sstevel@tonic-gate * kmutex_t task_hash_lock 757c478bd9Sstevel@tonic-gate * task_hash_lock is a global lock protecting the contents of the task 767c478bd9Sstevel@tonic-gate * ID-to-task pointer hash. Holders of task_hash_lock must not attempt to 777c478bd9Sstevel@tonic-gate * acquire pidlock or p_lock. 787c478bd9Sstevel@tonic-gate * uint_t tk_hold_count 797c478bd9Sstevel@tonic-gate * tk_hold_count, the number of members and observers of the current task, 807c478bd9Sstevel@tonic-gate * must be manipulated atomically. 817c478bd9Sstevel@tonic-gate * proc_t *tk_memb_list 827c478bd9Sstevel@tonic-gate * proc_t *p_tasknext 837c478bd9Sstevel@tonic-gate * proc_t *p_taskprev 847c478bd9Sstevel@tonic-gate * The task's membership list is protected by pidlock, and is therefore 857c478bd9Sstevel@tonic-gate * always acquired before any of its members' p_lock mutexes. The p_task 867c478bd9Sstevel@tonic-gate * member of the proc structure is protected by pidlock or p_lock for 877c478bd9Sstevel@tonic-gate * reading, and by both pidlock and p_lock for modification, as is done for 887c478bd9Sstevel@tonic-gate * p_sessp. The key point is that only the process can modify its p_task, 897c478bd9Sstevel@tonic-gate * and not any entity on the system. (/proc will use prlock() to prevent 907c478bd9Sstevel@tonic-gate * the process from leaving, as opposed to pidlock.) 917c478bd9Sstevel@tonic-gate * kmutex_t tk_usage_lock 927c478bd9Sstevel@tonic-gate * tk_usage_lock is a per-task lock protecting the contents of the task 937c478bd9Sstevel@tonic-gate * usage structure and tk_nlwps counter for the task.max-lwps resource 947c478bd9Sstevel@tonic-gate * control. 957c478bd9Sstevel@tonic-gate */ 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate int task_hash_size = 256; 987c478bd9Sstevel@tonic-gate static kmutex_t task_hash_lock; 997c478bd9Sstevel@tonic-gate static mod_hash_t *task_hash; 1007c478bd9Sstevel@tonic-gate 1017c478bd9Sstevel@tonic-gate static id_space_t *taskid_space; /* global taskid space */ 1027c478bd9Sstevel@tonic-gate static kmem_cache_t *task_cache; /* kmem cache for task structures */ 1037c478bd9Sstevel@tonic-gate 1047c478bd9Sstevel@tonic-gate rctl_hndl_t rc_task_lwps; 1057c478bd9Sstevel@tonic-gate rctl_hndl_t rc_task_cpu_time; 1067c478bd9Sstevel@tonic-gate 1077c478bd9Sstevel@tonic-gate /* 1087c478bd9Sstevel@tonic-gate * static rctl_qty_t task_usage_lwps(void *taskp) 1097c478bd9Sstevel@tonic-gate * 1107c478bd9Sstevel@tonic-gate * Overview 1117c478bd9Sstevel@tonic-gate * task_usage_lwps() is the usage operation for the resource control 1127c478bd9Sstevel@tonic-gate * associated with the number of LWPs in a task. 1137c478bd9Sstevel@tonic-gate * 1147c478bd9Sstevel@tonic-gate * Return values 1157c478bd9Sstevel@tonic-gate * The number of LWPs in the given task is returned. 1167c478bd9Sstevel@tonic-gate * 1177c478bd9Sstevel@tonic-gate * Caller's context 1187c478bd9Sstevel@tonic-gate * The p->p_lock must be held across the call. 1197c478bd9Sstevel@tonic-gate */ 1207c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1217c478bd9Sstevel@tonic-gate static rctl_qty_t 1227c478bd9Sstevel@tonic-gate task_lwps_usage(rctl_t *r, proc_t *p) 1237c478bd9Sstevel@tonic-gate { 1247c478bd9Sstevel@tonic-gate task_t *t; 1257c478bd9Sstevel@tonic-gate rctl_qty_t nlwps; 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate t = p->p_task; 1307c478bd9Sstevel@tonic-gate mutex_enter(&p->p_zone->zone_nlwps_lock); 1317c478bd9Sstevel@tonic-gate nlwps = t->tk_nlwps; 1327c478bd9Sstevel@tonic-gate mutex_exit(&p->p_zone->zone_nlwps_lock); 1337c478bd9Sstevel@tonic-gate 1347c478bd9Sstevel@tonic-gate return (nlwps); 1357c478bd9Sstevel@tonic-gate } 1367c478bd9Sstevel@tonic-gate 1377c478bd9Sstevel@tonic-gate /* 1387c478bd9Sstevel@tonic-gate * static int task_test_lwps(void *taskp, rctl_val_t *, int64_t incr, 1397c478bd9Sstevel@tonic-gate * int flags) 1407c478bd9Sstevel@tonic-gate * 1417c478bd9Sstevel@tonic-gate * Overview 1427c478bd9Sstevel@tonic-gate * task_test_lwps() is the test-if-valid-increment for the resource control 1437c478bd9Sstevel@tonic-gate * for the number of processes in a task. 1447c478bd9Sstevel@tonic-gate * 1457c478bd9Sstevel@tonic-gate * Return values 1467c478bd9Sstevel@tonic-gate * 0 if the threshold limit was not passed, 1 if the limit was passed. 1477c478bd9Sstevel@tonic-gate * 1487c478bd9Sstevel@tonic-gate * Caller's context 1497c478bd9Sstevel@tonic-gate * p->p_lock must be held across the call. 1507c478bd9Sstevel@tonic-gate */ 1517c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1527c478bd9Sstevel@tonic-gate static int 1537c478bd9Sstevel@tonic-gate task_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl, 1547c478bd9Sstevel@tonic-gate rctl_qty_t incr, 1557c478bd9Sstevel@tonic-gate uint_t flags) 1567c478bd9Sstevel@tonic-gate { 1577c478bd9Sstevel@tonic-gate rctl_qty_t nlwps; 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 1607c478bd9Sstevel@tonic-gate ASSERT(e->rcep_t == RCENTITY_TASK); 1617c478bd9Sstevel@tonic-gate if (e->rcep_p.task == NULL) 1627c478bd9Sstevel@tonic-gate return (0); 1637c478bd9Sstevel@tonic-gate 1647c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&(e->rcep_p.task->tk_zone->zone_nlwps_lock))); 1657c478bd9Sstevel@tonic-gate nlwps = e->rcep_p.task->tk_nlwps; 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate if (nlwps + incr > rcntl->rcv_value) 1687c478bd9Sstevel@tonic-gate return (1); 1697c478bd9Sstevel@tonic-gate 1707c478bd9Sstevel@tonic-gate return (0); 1717c478bd9Sstevel@tonic-gate } 1727c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1737c478bd9Sstevel@tonic-gate static int 1747c478bd9Sstevel@tonic-gate task_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) { 1757c478bd9Sstevel@tonic-gate 1767c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 1777c478bd9Sstevel@tonic-gate ASSERT(e->rcep_t == RCENTITY_TASK); 1787c478bd9Sstevel@tonic-gate if (e->rcep_p.task == NULL) 1797c478bd9Sstevel@tonic-gate return (0); 1807c478bd9Sstevel@tonic-gate 1817c478bd9Sstevel@tonic-gate e->rcep_p.task->tk_nlwps_ctl = nv; 1827c478bd9Sstevel@tonic-gate return (0); 1837c478bd9Sstevel@tonic-gate } 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate /* 1867c478bd9Sstevel@tonic-gate * static rctl_qty_t task_usage_cpu_secs(void *taskp) 1877c478bd9Sstevel@tonic-gate * 1887c478bd9Sstevel@tonic-gate * Overview 1897c478bd9Sstevel@tonic-gate * task_usage_cpu_secs() is the usage operation for the resource control 1907c478bd9Sstevel@tonic-gate * associated with the total accrued CPU seconds for a task. 1917c478bd9Sstevel@tonic-gate * 1927c478bd9Sstevel@tonic-gate * Return values 1937c478bd9Sstevel@tonic-gate * The number of CPU seconds consumed by the task is returned. 1947c478bd9Sstevel@tonic-gate * 1957c478bd9Sstevel@tonic-gate * Caller's context 1967c478bd9Sstevel@tonic-gate * The given task must be held across the call. 1977c478bd9Sstevel@tonic-gate */ 1987c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1997c478bd9Sstevel@tonic-gate static rctl_qty_t 2007c478bd9Sstevel@tonic-gate task_cpu_time_usage(rctl_t *r, proc_t *p) 2017c478bd9Sstevel@tonic-gate { 2027c478bd9Sstevel@tonic-gate task_t *t = p->p_task; 2037c478bd9Sstevel@tonic-gate 2047c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 2057c478bd9Sstevel@tonic-gate return (t->tk_cpu_time / hz); 2067c478bd9Sstevel@tonic-gate } 2077c478bd9Sstevel@tonic-gate 2087c478bd9Sstevel@tonic-gate /* 2097c478bd9Sstevel@tonic-gate * static int task_test_cpu_secs(void *taskp, rctl_val_t *, int64_t incr, 2107c478bd9Sstevel@tonic-gate * int flags) 2117c478bd9Sstevel@tonic-gate * 2127c478bd9Sstevel@tonic-gate * Overview 2137c478bd9Sstevel@tonic-gate * task_test_cpu_secs() is the test-if-valid-increment for the resource 2147c478bd9Sstevel@tonic-gate * control for the total accrued CPU seconds for a task. 2157c478bd9Sstevel@tonic-gate * 2167c478bd9Sstevel@tonic-gate * Return values 2177c478bd9Sstevel@tonic-gate * 0 if the threshold limit was not passed, 1 if the limit was passed. 2187c478bd9Sstevel@tonic-gate * 2197c478bd9Sstevel@tonic-gate * Caller's context 2207c478bd9Sstevel@tonic-gate * The given task must be held across the call. 2217c478bd9Sstevel@tonic-gate */ 2227c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 2237c478bd9Sstevel@tonic-gate static int 2247c478bd9Sstevel@tonic-gate task_cpu_time_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, 2257c478bd9Sstevel@tonic-gate struct rctl_val *rcntl, rctl_qty_t incr, uint_t flags) 2267c478bd9Sstevel@tonic-gate { 2277c478bd9Sstevel@tonic-gate task_t *t; 2287c478bd9Sstevel@tonic-gate 2297c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 2307c478bd9Sstevel@tonic-gate ASSERT(e->rcep_t == RCENTITY_TASK); 2317c478bd9Sstevel@tonic-gate if (e->rcep_p.task == NULL) 2327c478bd9Sstevel@tonic-gate return (0); 2337c478bd9Sstevel@tonic-gate 2347c478bd9Sstevel@tonic-gate t = e->rcep_p.task; 2357c478bd9Sstevel@tonic-gate if ((t->tk_cpu_time + incr) / hz >= rcntl->rcv_value) 2367c478bd9Sstevel@tonic-gate return (1); 2377c478bd9Sstevel@tonic-gate 2387c478bd9Sstevel@tonic-gate return (0); 2397c478bd9Sstevel@tonic-gate } 2407c478bd9Sstevel@tonic-gate 2417c478bd9Sstevel@tonic-gate static task_t * 2427c478bd9Sstevel@tonic-gate task_find(taskid_t id, zoneid_t zoneid) 2437c478bd9Sstevel@tonic-gate { 2447c478bd9Sstevel@tonic-gate task_t *tk; 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&task_hash_lock)); 2477c478bd9Sstevel@tonic-gate 2487c478bd9Sstevel@tonic-gate if (mod_hash_find(task_hash, (mod_hash_key_t)(uintptr_t)id, 2497c478bd9Sstevel@tonic-gate (mod_hash_val_t *)&tk) == MH_ERR_NOTFOUND || 2507c478bd9Sstevel@tonic-gate (zoneid != ALL_ZONES && zoneid != tk->tk_zone->zone_id)) 2517c478bd9Sstevel@tonic-gate return (NULL); 2527c478bd9Sstevel@tonic-gate 2537c478bd9Sstevel@tonic-gate return (tk); 2547c478bd9Sstevel@tonic-gate } 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate /* 2577c478bd9Sstevel@tonic-gate * task_hold_by_id(), task_hold_by_id_zone() 2587c478bd9Sstevel@tonic-gate * 2597c478bd9Sstevel@tonic-gate * Overview 2607c478bd9Sstevel@tonic-gate * task_hold_by_id() is used to take a reference on a task by its task id, 2617c478bd9Sstevel@tonic-gate * supporting the various system call interfaces for obtaining resource data, 2627c478bd9Sstevel@tonic-gate * delivering signals, and so forth. 2637c478bd9Sstevel@tonic-gate * 2647c478bd9Sstevel@tonic-gate * Return values 2657c478bd9Sstevel@tonic-gate * Returns a pointer to the task_t with taskid_t id. The task is returned 2667c478bd9Sstevel@tonic-gate * with its hold count incremented by one. Returns NULL if there 2677c478bd9Sstevel@tonic-gate * is no task with the requested id. 2687c478bd9Sstevel@tonic-gate * 2697c478bd9Sstevel@tonic-gate * Caller's context 2707c478bd9Sstevel@tonic-gate * Caller must not be holding task_hash_lock. No restrictions on context. 2717c478bd9Sstevel@tonic-gate */ 2727c478bd9Sstevel@tonic-gate task_t * 2737c478bd9Sstevel@tonic-gate task_hold_by_id_zone(taskid_t id, zoneid_t zoneid) 2747c478bd9Sstevel@tonic-gate { 2757c478bd9Sstevel@tonic-gate task_t *tk; 2767c478bd9Sstevel@tonic-gate 2777c478bd9Sstevel@tonic-gate mutex_enter(&task_hash_lock); 2787c478bd9Sstevel@tonic-gate if ((tk = task_find(id, zoneid)) != NULL) 2797c478bd9Sstevel@tonic-gate atomic_add_32(&tk->tk_hold_count, 1); 2807c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 2817c478bd9Sstevel@tonic-gate 2827c478bd9Sstevel@tonic-gate return (tk); 2837c478bd9Sstevel@tonic-gate } 2847c478bd9Sstevel@tonic-gate 2857c478bd9Sstevel@tonic-gate task_t * 2867c478bd9Sstevel@tonic-gate task_hold_by_id(taskid_t id) 2877c478bd9Sstevel@tonic-gate { 2887c478bd9Sstevel@tonic-gate zoneid_t zoneid; 2897c478bd9Sstevel@tonic-gate 2907c478bd9Sstevel@tonic-gate if (INGLOBALZONE(curproc)) 2917c478bd9Sstevel@tonic-gate zoneid = ALL_ZONES; 2927c478bd9Sstevel@tonic-gate else 2937c478bd9Sstevel@tonic-gate zoneid = getzoneid(); 2947c478bd9Sstevel@tonic-gate return (task_hold_by_id_zone(id, zoneid)); 2957c478bd9Sstevel@tonic-gate } 2967c478bd9Sstevel@tonic-gate 2977c478bd9Sstevel@tonic-gate /* 2987c478bd9Sstevel@tonic-gate * void task_hold(task_t *) 2997c478bd9Sstevel@tonic-gate * 3007c478bd9Sstevel@tonic-gate * Overview 3017c478bd9Sstevel@tonic-gate * task_hold() is used to take an additional reference to the given task. 3027c478bd9Sstevel@tonic-gate * 3037c478bd9Sstevel@tonic-gate * Return values 3047c478bd9Sstevel@tonic-gate * None. 3057c478bd9Sstevel@tonic-gate * 3067c478bd9Sstevel@tonic-gate * Caller's context 3077c478bd9Sstevel@tonic-gate * No restriction on context. 3087c478bd9Sstevel@tonic-gate */ 3097c478bd9Sstevel@tonic-gate void 3107c478bd9Sstevel@tonic-gate task_hold(task_t *tk) 3117c478bd9Sstevel@tonic-gate { 3127c478bd9Sstevel@tonic-gate atomic_add_32(&tk->tk_hold_count, 1); 3137c478bd9Sstevel@tonic-gate } 3147c478bd9Sstevel@tonic-gate 3157c478bd9Sstevel@tonic-gate /* 3167c478bd9Sstevel@tonic-gate * void task_rele(task_t *) 3177c478bd9Sstevel@tonic-gate * 3187c478bd9Sstevel@tonic-gate * Overview 3197c478bd9Sstevel@tonic-gate * task_rele() relinquishes a reference on the given task, which was acquired 3207c478bd9Sstevel@tonic-gate * via task_hold() or task_hold_by_id(). If this is the last member or 3217c478bd9Sstevel@tonic-gate * observer of the task, dispatch it for commitment via the accounting 3227c478bd9Sstevel@tonic-gate * subsystem. 3237c478bd9Sstevel@tonic-gate * 3247c478bd9Sstevel@tonic-gate * Return values 3257c478bd9Sstevel@tonic-gate * None. 3267c478bd9Sstevel@tonic-gate * 3277c478bd9Sstevel@tonic-gate * Caller's context 3287c478bd9Sstevel@tonic-gate * Caller must not be holding the task_hash_lock. 3297c478bd9Sstevel@tonic-gate * Caller's context must be acceptable for KM_SLEEP allocations. 3307c478bd9Sstevel@tonic-gate */ 3317c478bd9Sstevel@tonic-gate void 3327c478bd9Sstevel@tonic-gate task_rele(task_t *tk) 3337c478bd9Sstevel@tonic-gate { 3347c478bd9Sstevel@tonic-gate mutex_enter(&task_hash_lock); 3357c478bd9Sstevel@tonic-gate if (atomic_add_32_nv(&tk->tk_hold_count, -1) > 0) { 3367c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 3377c478bd9Sstevel@tonic-gate return; 3387c478bd9Sstevel@tonic-gate } 3397c478bd9Sstevel@tonic-gate 3407c478bd9Sstevel@tonic-gate mutex_enter(&tk->tk_zone->zone_nlwps_lock); 3417c478bd9Sstevel@tonic-gate tk->tk_proj->kpj_ntasks--; 3427c478bd9Sstevel@tonic-gate mutex_exit(&tk->tk_zone->zone_nlwps_lock); 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate if (mod_hash_destroy(task_hash, 3457c478bd9Sstevel@tonic-gate (mod_hash_key_t)(uintptr_t)tk->tk_tkid) != 0) 3467c478bd9Sstevel@tonic-gate panic("unable to delete task %d", tk->tk_tkid); 3477c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 3487c478bd9Sstevel@tonic-gate 3497c478bd9Sstevel@tonic-gate /* 3507c478bd9Sstevel@tonic-gate * At this point, there are no members or observers of the task, so we 3517c478bd9Sstevel@tonic-gate * can safely send it on for commitment to the accounting subsystem. 3527c478bd9Sstevel@tonic-gate * The task will be destroyed in task_end() subsequent to commitment. 3537c478bd9Sstevel@tonic-gate */ 3547c478bd9Sstevel@tonic-gate (void) taskq_dispatch(exacct_queue, exacct_commit_task, tk, KM_SLEEP); 3557c478bd9Sstevel@tonic-gate } 3567c478bd9Sstevel@tonic-gate 3577c478bd9Sstevel@tonic-gate /* 3587c478bd9Sstevel@tonic-gate * task_t *task_create(projid_t, zone *) 3597c478bd9Sstevel@tonic-gate * 3607c478bd9Sstevel@tonic-gate * Overview 3617c478bd9Sstevel@tonic-gate * A process constructing a new task calls task_create() to construct and 3627c478bd9Sstevel@tonic-gate * preinitialize the task for the appropriate destination project. Only one 3637c478bd9Sstevel@tonic-gate * task, the primordial task0, is not created with task_create(). 3647c478bd9Sstevel@tonic-gate * 3657c478bd9Sstevel@tonic-gate * Return values 3667c478bd9Sstevel@tonic-gate * None. 3677c478bd9Sstevel@tonic-gate * 3687c478bd9Sstevel@tonic-gate * Caller's context 3697c478bd9Sstevel@tonic-gate * Caller's context should be safe for KM_SLEEP allocations. 3707c478bd9Sstevel@tonic-gate * The caller should appropriately bump the kpj_ntasks counter on the 3717c478bd9Sstevel@tonic-gate * project that contains this task. 3727c478bd9Sstevel@tonic-gate */ 3737c478bd9Sstevel@tonic-gate task_t * 3747c478bd9Sstevel@tonic-gate task_create(projid_t projid, zone_t *zone) 3757c478bd9Sstevel@tonic-gate { 3767c478bd9Sstevel@tonic-gate task_t *tk = kmem_cache_alloc(task_cache, KM_SLEEP); 3777c478bd9Sstevel@tonic-gate task_t *ancestor_tk; 3787c478bd9Sstevel@tonic-gate taskid_t tkid; 3797c478bd9Sstevel@tonic-gate task_usage_t *tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 3807c478bd9Sstevel@tonic-gate mod_hash_hndl_t hndl; 3817c478bd9Sstevel@tonic-gate rctl_set_t *set = rctl_set_create(); 3827c478bd9Sstevel@tonic-gate rctl_alloc_gp_t *gp; 3837c478bd9Sstevel@tonic-gate rctl_entity_p_t e; 3847c478bd9Sstevel@tonic-gate 3857c478bd9Sstevel@tonic-gate bzero(tk, sizeof (task_t)); 3867c478bd9Sstevel@tonic-gate 3877c478bd9Sstevel@tonic-gate tk->tk_tkid = tkid = id_alloc(taskid_space); 3887c478bd9Sstevel@tonic-gate tk->tk_nlwps = 0; 3897c478bd9Sstevel@tonic-gate tk->tk_nlwps_ctl = INT_MAX; 3907c478bd9Sstevel@tonic-gate tk->tk_usage = tu; 391*c97ad5cdSakolb tk->tk_proj = project_hold_by_id(projid, zone, PROJECT_HOLD_INSERT); 3927c478bd9Sstevel@tonic-gate tk->tk_flags = TASK_NORMAL; 3937c478bd9Sstevel@tonic-gate 3947c478bd9Sstevel@tonic-gate /* 3957c478bd9Sstevel@tonic-gate * Copy ancestor task's resource controls. 3967c478bd9Sstevel@tonic-gate */ 3977c478bd9Sstevel@tonic-gate zone_task_hold(zone); 3987c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 3997c478bd9Sstevel@tonic-gate ancestor_tk = curproc->p_task; 4007c478bd9Sstevel@tonic-gate task_hold(ancestor_tk); 4017c478bd9Sstevel@tonic-gate tk->tk_zone = zone; 4027c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate for (;;) { 4057c478bd9Sstevel@tonic-gate gp = rctl_set_dup_prealloc(ancestor_tk->tk_rctls); 4067c478bd9Sstevel@tonic-gate 4077c478bd9Sstevel@tonic-gate mutex_enter(&ancestor_tk->tk_rctls->rcs_lock); 4087c478bd9Sstevel@tonic-gate if (rctl_set_dup_ready(ancestor_tk->tk_rctls, gp)) 4097c478bd9Sstevel@tonic-gate break; 4107c478bd9Sstevel@tonic-gate 4117c478bd9Sstevel@tonic-gate mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 4127c478bd9Sstevel@tonic-gate 4137c478bd9Sstevel@tonic-gate rctl_prealloc_destroy(gp); 4147c478bd9Sstevel@tonic-gate } 4157c478bd9Sstevel@tonic-gate 4167c478bd9Sstevel@tonic-gate /* 4177c478bd9Sstevel@tonic-gate * At this point, curproc does not have the appropriate linkage 4187c478bd9Sstevel@tonic-gate * through the task to the project. So, rctl_set_dup should only 4197c478bd9Sstevel@tonic-gate * copy the rctls, and leave the callbacks for later. 4207c478bd9Sstevel@tonic-gate */ 4217c478bd9Sstevel@tonic-gate e.rcep_p.task = tk; 4227c478bd9Sstevel@tonic-gate e.rcep_t = RCENTITY_TASK; 4237c478bd9Sstevel@tonic-gate tk->tk_rctls = rctl_set_dup(ancestor_tk->tk_rctls, curproc, curproc, &e, 4247c478bd9Sstevel@tonic-gate set, gp, RCD_DUP); 4257c478bd9Sstevel@tonic-gate mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 4267c478bd9Sstevel@tonic-gate 4277c478bd9Sstevel@tonic-gate rctl_prealloc_destroy(gp); 4287c478bd9Sstevel@tonic-gate 4297c478bd9Sstevel@tonic-gate /* 4307c478bd9Sstevel@tonic-gate * Record the ancestor task's ID for use by extended accounting. 4317c478bd9Sstevel@tonic-gate */ 4327c478bd9Sstevel@tonic-gate tu->tu_anctaskid = ancestor_tk->tk_tkid; 4337c478bd9Sstevel@tonic-gate task_rele(ancestor_tk); 4347c478bd9Sstevel@tonic-gate 4357c478bd9Sstevel@tonic-gate /* 4367c478bd9Sstevel@tonic-gate * Put new task structure in the hash table. 4377c478bd9Sstevel@tonic-gate */ 4387c478bd9Sstevel@tonic-gate (void) mod_hash_reserve(task_hash, &hndl); 4397c478bd9Sstevel@tonic-gate mutex_enter(&task_hash_lock); 4407c478bd9Sstevel@tonic-gate ASSERT(task_find(tkid, getzoneid()) == NULL); 4417c478bd9Sstevel@tonic-gate if (mod_hash_insert_reserve(task_hash, (mod_hash_key_t)(uintptr_t)tkid, 4427c478bd9Sstevel@tonic-gate (mod_hash_val_t *)tk, hndl) != 0) { 4437c478bd9Sstevel@tonic-gate mod_hash_cancel(task_hash, &hndl); 4447c478bd9Sstevel@tonic-gate panic("unable to insert task %d(%p)", tkid, (void *)tk); 4457c478bd9Sstevel@tonic-gate } 4467c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 4477c478bd9Sstevel@tonic-gate 4487c478bd9Sstevel@tonic-gate return (tk); 4497c478bd9Sstevel@tonic-gate } 4507c478bd9Sstevel@tonic-gate 4517c478bd9Sstevel@tonic-gate /* 4527c478bd9Sstevel@tonic-gate * void task_attach(task_t *, proc_t *) 4537c478bd9Sstevel@tonic-gate * 4547c478bd9Sstevel@tonic-gate * Overview 4557c478bd9Sstevel@tonic-gate * task_attach() is used to attach a process to a task; this operation is only 4567c478bd9Sstevel@tonic-gate * performed as a result of a fork() or settaskid() system call. The proc_t's 4577c478bd9Sstevel@tonic-gate * p_tasknext and p_taskprev fields will be set such that the proc_t is a 4587c478bd9Sstevel@tonic-gate * member of the doubly-linked list of proc_t's that make up the task. 4597c478bd9Sstevel@tonic-gate * 4607c478bd9Sstevel@tonic-gate * Return values 4617c478bd9Sstevel@tonic-gate * None. 4627c478bd9Sstevel@tonic-gate * 4637c478bd9Sstevel@tonic-gate * Caller's context 4647c478bd9Sstevel@tonic-gate * pidlock and p->p_lock must be held on entry. 4657c478bd9Sstevel@tonic-gate */ 4667c478bd9Sstevel@tonic-gate void 4677c478bd9Sstevel@tonic-gate task_attach(task_t *tk, proc_t *p) 4687c478bd9Sstevel@tonic-gate { 4697c478bd9Sstevel@tonic-gate proc_t *first, *prev; 4707c478bd9Sstevel@tonic-gate rctl_entity_p_t e; 4717c478bd9Sstevel@tonic-gate ASSERT(tk != NULL); 4727c478bd9Sstevel@tonic-gate ASSERT(p != NULL); 4737c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 4747c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 4757c478bd9Sstevel@tonic-gate 4767c478bd9Sstevel@tonic-gate if (tk->tk_memb_list == NULL) { 4777c478bd9Sstevel@tonic-gate p->p_tasknext = p; 4787c478bd9Sstevel@tonic-gate p->p_taskprev = p; 4797c478bd9Sstevel@tonic-gate } else { 4807c478bd9Sstevel@tonic-gate first = tk->tk_memb_list; 4817c478bd9Sstevel@tonic-gate prev = first->p_taskprev; 4827c478bd9Sstevel@tonic-gate first->p_taskprev = p; 4837c478bd9Sstevel@tonic-gate p->p_tasknext = first; 4847c478bd9Sstevel@tonic-gate p->p_taskprev = prev; 4857c478bd9Sstevel@tonic-gate prev->p_tasknext = p; 4867c478bd9Sstevel@tonic-gate } 4877c478bd9Sstevel@tonic-gate tk->tk_memb_list = p; 4887c478bd9Sstevel@tonic-gate task_hold(tk); 4897c478bd9Sstevel@tonic-gate p->p_task = tk; 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate /* 4927c478bd9Sstevel@tonic-gate * Now that the linkage from process to task and project is 4937c478bd9Sstevel@tonic-gate * complete, do the required callbacks for the task and project 4947c478bd9Sstevel@tonic-gate * rctl sets. 4957c478bd9Sstevel@tonic-gate */ 4967c478bd9Sstevel@tonic-gate e.rcep_p.proj = tk->tk_proj; 4977c478bd9Sstevel@tonic-gate e.rcep_t = RCENTITY_PROJECT; 4987c478bd9Sstevel@tonic-gate (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_proj->kpj_rctls, NULL, 4997c478bd9Sstevel@tonic-gate RCD_CALLBACK); 5007c478bd9Sstevel@tonic-gate 5017c478bd9Sstevel@tonic-gate e.rcep_p.task = tk; 5027c478bd9Sstevel@tonic-gate e.rcep_t = RCENTITY_TASK; 5037c478bd9Sstevel@tonic-gate (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_rctls, NULL, 5047c478bd9Sstevel@tonic-gate RCD_CALLBACK); 5057c478bd9Sstevel@tonic-gate 5067c478bd9Sstevel@tonic-gate } 5077c478bd9Sstevel@tonic-gate 5087c478bd9Sstevel@tonic-gate /* 5097c478bd9Sstevel@tonic-gate * task_begin() 5107c478bd9Sstevel@tonic-gate * 5117c478bd9Sstevel@tonic-gate * Overview 5127c478bd9Sstevel@tonic-gate * A process constructing a new task calls task_begin() to initialize the 5137c478bd9Sstevel@tonic-gate * task, by attaching itself as a member. 5147c478bd9Sstevel@tonic-gate * 5157c478bd9Sstevel@tonic-gate * Return values 5167c478bd9Sstevel@tonic-gate * None. 5177c478bd9Sstevel@tonic-gate * 5187c478bd9Sstevel@tonic-gate * Caller's context 5197c478bd9Sstevel@tonic-gate * pidlock and p_lock must be held across the call to task_begin(). 5207c478bd9Sstevel@tonic-gate */ 5217c478bd9Sstevel@tonic-gate void 5227c478bd9Sstevel@tonic-gate task_begin(task_t *tk, proc_t *p) 5237c478bd9Sstevel@tonic-gate { 5247c478bd9Sstevel@tonic-gate timestruc_t ts; 5257c478bd9Sstevel@tonic-gate task_usage_t *tu; 5267c478bd9Sstevel@tonic-gate 5277c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 5287c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 5297c478bd9Sstevel@tonic-gate 5307c478bd9Sstevel@tonic-gate mutex_enter(&tk->tk_usage_lock); 5317c478bd9Sstevel@tonic-gate tu = tk->tk_usage; 5327c478bd9Sstevel@tonic-gate gethrestime(&ts); 5337c478bd9Sstevel@tonic-gate tu->tu_startsec = (uint64_t)ts.tv_sec; 5347c478bd9Sstevel@tonic-gate tu->tu_startnsec = (uint64_t)ts.tv_nsec; 5357c478bd9Sstevel@tonic-gate mutex_exit(&tk->tk_usage_lock); 5367c478bd9Sstevel@tonic-gate 5377c478bd9Sstevel@tonic-gate /* 5387c478bd9Sstevel@tonic-gate * Join process to the task as a member. 5397c478bd9Sstevel@tonic-gate */ 5407c478bd9Sstevel@tonic-gate task_attach(tk, p); 5417c478bd9Sstevel@tonic-gate } 5427c478bd9Sstevel@tonic-gate 5437c478bd9Sstevel@tonic-gate /* 5447c478bd9Sstevel@tonic-gate * void task_detach(proc_t *) 5457c478bd9Sstevel@tonic-gate * 5467c478bd9Sstevel@tonic-gate * Overview 5477c478bd9Sstevel@tonic-gate * task_detach() removes the specified process from its task. task_detach 5487c478bd9Sstevel@tonic-gate * sets the process's task membership to NULL, in anticipation of a final exit 5497c478bd9Sstevel@tonic-gate * or of joining a new task. Because task_rele() requires a context safe for 5507c478bd9Sstevel@tonic-gate * KM_SLEEP allocations, a task_detach() is followed by a subsequent 5517c478bd9Sstevel@tonic-gate * task_rele() once appropriate context is available. 5527c478bd9Sstevel@tonic-gate * 5537c478bd9Sstevel@tonic-gate * Because task_detach() involves relinquishing the process's membership in 5547c478bd9Sstevel@tonic-gate * the project, any observational rctls the process may have had on the task 5557c478bd9Sstevel@tonic-gate * or project are destroyed. 5567c478bd9Sstevel@tonic-gate * 5577c478bd9Sstevel@tonic-gate * Return values 5587c478bd9Sstevel@tonic-gate * None. 5597c478bd9Sstevel@tonic-gate * 5607c478bd9Sstevel@tonic-gate * Caller's context 5617c478bd9Sstevel@tonic-gate * pidlock and p_lock held across task_detach(). 5627c478bd9Sstevel@tonic-gate */ 5637c478bd9Sstevel@tonic-gate void 5647c478bd9Sstevel@tonic-gate task_detach(proc_t *p) 5657c478bd9Sstevel@tonic-gate { 5667c478bd9Sstevel@tonic-gate task_t *tk = p->p_task; 5677c478bd9Sstevel@tonic-gate 5687c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 5697c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 5707c478bd9Sstevel@tonic-gate ASSERT(p->p_task != NULL); 5717c478bd9Sstevel@tonic-gate ASSERT(tk->tk_memb_list != NULL); 5727c478bd9Sstevel@tonic-gate 5737c478bd9Sstevel@tonic-gate if (tk->tk_memb_list == p) 5747c478bd9Sstevel@tonic-gate tk->tk_memb_list = p->p_tasknext; 5757c478bd9Sstevel@tonic-gate if (tk->tk_memb_list == p) 5767c478bd9Sstevel@tonic-gate tk->tk_memb_list = NULL; 5777c478bd9Sstevel@tonic-gate p->p_taskprev->p_tasknext = p->p_tasknext; 5787c478bd9Sstevel@tonic-gate p->p_tasknext->p_taskprev = p->p_taskprev; 5797c478bd9Sstevel@tonic-gate 5807c478bd9Sstevel@tonic-gate rctl_set_tearoff(p->p_task->tk_rctls, p); 5817c478bd9Sstevel@tonic-gate rctl_set_tearoff(p->p_task->tk_proj->kpj_rctls, p); 5827c478bd9Sstevel@tonic-gate 5837c478bd9Sstevel@tonic-gate p->p_task = NULL; 5847c478bd9Sstevel@tonic-gate p->p_tasknext = p->p_taskprev = NULL; 5857c478bd9Sstevel@tonic-gate } 5867c478bd9Sstevel@tonic-gate 5877c478bd9Sstevel@tonic-gate /* 5887c478bd9Sstevel@tonic-gate * task_change(task_t *, proc_t *) 5897c478bd9Sstevel@tonic-gate * 5907c478bd9Sstevel@tonic-gate * Overview 5917c478bd9Sstevel@tonic-gate * task_change() removes the specified process from its current task. The 5927c478bd9Sstevel@tonic-gate * process is then attached to the specified task. This routine is called 5937c478bd9Sstevel@tonic-gate * from settaskid() when process is being moved to a new task. 5947c478bd9Sstevel@tonic-gate * 5957c478bd9Sstevel@tonic-gate * Return values 5967c478bd9Sstevel@tonic-gate * None. 5977c478bd9Sstevel@tonic-gate * 5987c478bd9Sstevel@tonic-gate * Caller's context 5997c478bd9Sstevel@tonic-gate * pidlock and p_lock held across task_change() 6007c478bd9Sstevel@tonic-gate */ 6017c478bd9Sstevel@tonic-gate void 6027c478bd9Sstevel@tonic-gate task_change(task_t *newtk, proc_t *p) 6037c478bd9Sstevel@tonic-gate { 6047c478bd9Sstevel@tonic-gate task_t *oldtk = p->p_task; 6057c478bd9Sstevel@tonic-gate 6067c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 6077c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 6087c478bd9Sstevel@tonic-gate ASSERT(oldtk != NULL); 6097c478bd9Sstevel@tonic-gate ASSERT(oldtk->tk_memb_list != NULL); 6107c478bd9Sstevel@tonic-gate 6117c478bd9Sstevel@tonic-gate mutex_enter(&p->p_zone->zone_nlwps_lock); 6127c478bd9Sstevel@tonic-gate oldtk->tk_nlwps -= p->p_lwpcnt; 6137c478bd9Sstevel@tonic-gate mutex_exit(&p->p_zone->zone_nlwps_lock); 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate mutex_enter(&newtk->tk_zone->zone_nlwps_lock); 6167c478bd9Sstevel@tonic-gate newtk->tk_nlwps += p->p_lwpcnt; 6177c478bd9Sstevel@tonic-gate mutex_exit(&newtk->tk_zone->zone_nlwps_lock); 6187c478bd9Sstevel@tonic-gate 6197c478bd9Sstevel@tonic-gate task_detach(p); 6207c478bd9Sstevel@tonic-gate task_begin(newtk, p); 6217c478bd9Sstevel@tonic-gate } 6227c478bd9Sstevel@tonic-gate 6237c478bd9Sstevel@tonic-gate /* 6247c478bd9Sstevel@tonic-gate * task_end() 6257c478bd9Sstevel@tonic-gate * 6267c478bd9Sstevel@tonic-gate * Overview 6277c478bd9Sstevel@tonic-gate * task_end() contains the actions executed once the final member of 6287c478bd9Sstevel@tonic-gate * a task has released the task, and all actions connected with the task, such 6297c478bd9Sstevel@tonic-gate * as committing an accounting record to a file, are completed. It is called 6307c478bd9Sstevel@tonic-gate * by the known last consumer of the task information. Additionally, 6317c478bd9Sstevel@tonic-gate * task_end() must never refer to any process in the system. 6327c478bd9Sstevel@tonic-gate * 6337c478bd9Sstevel@tonic-gate * Return values 6347c478bd9Sstevel@tonic-gate * None. 6357c478bd9Sstevel@tonic-gate * 6367c478bd9Sstevel@tonic-gate * Caller's context 6377c478bd9Sstevel@tonic-gate * No restrictions on context, beyond that given above. 6387c478bd9Sstevel@tonic-gate */ 6397c478bd9Sstevel@tonic-gate void 6407c478bd9Sstevel@tonic-gate task_end(task_t *tk) 6417c478bd9Sstevel@tonic-gate { 6427c478bd9Sstevel@tonic-gate ASSERT(tk->tk_hold_count == 0); 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate project_rele(tk->tk_proj); 6457c478bd9Sstevel@tonic-gate kmem_free(tk->tk_usage, sizeof (task_usage_t)); 6467c478bd9Sstevel@tonic-gate if (tk->tk_prevusage != NULL) 6477c478bd9Sstevel@tonic-gate kmem_free(tk->tk_prevusage, sizeof (task_usage_t)); 6487c478bd9Sstevel@tonic-gate if (tk->tk_zoneusage != NULL) 6497c478bd9Sstevel@tonic-gate kmem_free(tk->tk_zoneusage, sizeof (task_usage_t)); 6507c478bd9Sstevel@tonic-gate rctl_set_free(tk->tk_rctls); 6517c478bd9Sstevel@tonic-gate id_free(taskid_space, tk->tk_tkid); 6527c478bd9Sstevel@tonic-gate zone_task_rele(tk->tk_zone); 6537c478bd9Sstevel@tonic-gate kmem_cache_free(task_cache, tk); 6547c478bd9Sstevel@tonic-gate } 6557c478bd9Sstevel@tonic-gate 6567c478bd9Sstevel@tonic-gate static void 6577c478bd9Sstevel@tonic-gate changeproj(proc_t *p, kproject_t *kpj, zone_t *zone, void *projbuf, 6587c478bd9Sstevel@tonic-gate void *zonebuf) 6597c478bd9Sstevel@tonic-gate { 6607c478bd9Sstevel@tonic-gate kproject_t *oldkpj; 6617c478bd9Sstevel@tonic-gate kthread_t *t; 6627c478bd9Sstevel@tonic-gate 6637c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 6647c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 6657c478bd9Sstevel@tonic-gate 6667c478bd9Sstevel@tonic-gate if ((t = p->p_tlist) != NULL) { 6677c478bd9Sstevel@tonic-gate do { 6687c478bd9Sstevel@tonic-gate (void) project_hold(kpj); 6697c478bd9Sstevel@tonic-gate 6707c478bd9Sstevel@tonic-gate thread_lock(t); 6717c478bd9Sstevel@tonic-gate oldkpj = ttoproj(t); 672*c97ad5cdSakolb 673*c97ad5cdSakolb /* 674*c97ad5cdSakolb * Kick this thread so that he doesn't sit 675*c97ad5cdSakolb * on a wrong wait queue. 676*c97ad5cdSakolb */ 677*c97ad5cdSakolb if (ISWAITING(t)) 678*c97ad5cdSakolb setrun_locked(t); 679*c97ad5cdSakolb 680*c97ad5cdSakolb /* 681*c97ad5cdSakolb * The thread wants to go on the project wait queue, but 682*c97ad5cdSakolb * the waitq is changing. 683*c97ad5cdSakolb */ 684*c97ad5cdSakolb if (t->t_schedflag & TS_PROJWAITQ) 685*c97ad5cdSakolb t->t_schedflag &= ~ TS_PROJWAITQ; 686*c97ad5cdSakolb 6877c478bd9Sstevel@tonic-gate t->t_proj = kpj; 6887c478bd9Sstevel@tonic-gate t->t_pre_sys = 1; /* For cred update */ 6897c478bd9Sstevel@tonic-gate thread_unlock(t); 6907c478bd9Sstevel@tonic-gate fss_changeproj(t, kpj, zone, projbuf, zonebuf); 6917c478bd9Sstevel@tonic-gate 6927c478bd9Sstevel@tonic-gate project_rele(oldkpj); 6937c478bd9Sstevel@tonic-gate } while ((t = t->t_forw) != p->p_tlist); 6947c478bd9Sstevel@tonic-gate } 6957c478bd9Sstevel@tonic-gate } 6967c478bd9Sstevel@tonic-gate 6977c478bd9Sstevel@tonic-gate /* 6987c478bd9Sstevel@tonic-gate * task_join() 6997c478bd9Sstevel@tonic-gate * 7007c478bd9Sstevel@tonic-gate * Overview 7017c478bd9Sstevel@tonic-gate * task_join() contains the actions that must be executed when the first 7027c478bd9Sstevel@tonic-gate * member (curproc) of a newly created task joins it. It may never fail. 7037c478bd9Sstevel@tonic-gate * 7047c478bd9Sstevel@tonic-gate * The caller must make sure holdlwps() is called so that all other lwps are 7057c478bd9Sstevel@tonic-gate * stopped prior to calling this function. 7067c478bd9Sstevel@tonic-gate * 7077c478bd9Sstevel@tonic-gate * NB: It returns with curproc->p_lock held. 7087c478bd9Sstevel@tonic-gate * 7097c478bd9Sstevel@tonic-gate * Return values 7107c478bd9Sstevel@tonic-gate * Pointer to the old task. 7117c478bd9Sstevel@tonic-gate * 7127c478bd9Sstevel@tonic-gate * Caller's context 7137c478bd9Sstevel@tonic-gate * cpu_lock must be held entering the function. It will acquire pidlock, 7147c478bd9Sstevel@tonic-gate * p_crlock and p_lock during execution. 7157c478bd9Sstevel@tonic-gate */ 7167c478bd9Sstevel@tonic-gate task_t * 7177c478bd9Sstevel@tonic-gate task_join(task_t *tk, uint_t flags) 7187c478bd9Sstevel@tonic-gate { 7197c478bd9Sstevel@tonic-gate proc_t *p = ttoproc(curthread); 7207c478bd9Sstevel@tonic-gate task_t *prev_tk; 7217c478bd9Sstevel@tonic-gate void *projbuf, *zonebuf; 7227c478bd9Sstevel@tonic-gate zone_t *zone = tk->tk_zone; 7237c478bd9Sstevel@tonic-gate projid_t projid = tk->tk_proj->kpj_id; 7247c478bd9Sstevel@tonic-gate cred_t *oldcr; 7257c478bd9Sstevel@tonic-gate 7267c478bd9Sstevel@tonic-gate /* 7277c478bd9Sstevel@tonic-gate * We can't know for sure if holdlwps() was called, but we can check to 7287c478bd9Sstevel@tonic-gate * ensure we're single-threaded. 7297c478bd9Sstevel@tonic-gate */ 7307c478bd9Sstevel@tonic-gate ASSERT(curthread == p->p_agenttp || p->p_lwprcnt == 1); 7317c478bd9Sstevel@tonic-gate 7327c478bd9Sstevel@tonic-gate /* 7337c478bd9Sstevel@tonic-gate * Changing the credential is always hard because we cannot 7347c478bd9Sstevel@tonic-gate * allocate memory when holding locks but we don't know whether 7357c478bd9Sstevel@tonic-gate * we need to change it. We first get a reference to the current 7367c478bd9Sstevel@tonic-gate * cred if we need to change it. Then we create a credential 7377c478bd9Sstevel@tonic-gate * with an updated project id. Finally we install it, first 7387c478bd9Sstevel@tonic-gate * releasing the reference we had on the p_cred at the time we 7397c478bd9Sstevel@tonic-gate * acquired the lock the first time and later we release the 7407c478bd9Sstevel@tonic-gate * reference to p_cred at the time we acquired the lock the 7417c478bd9Sstevel@tonic-gate * second time. 7427c478bd9Sstevel@tonic-gate */ 7437c478bd9Sstevel@tonic-gate mutex_enter(&p->p_crlock); 7447c478bd9Sstevel@tonic-gate if (crgetprojid(p->p_cred) == projid) 7457c478bd9Sstevel@tonic-gate oldcr = NULL; 7467c478bd9Sstevel@tonic-gate else 7477c478bd9Sstevel@tonic-gate crhold(oldcr = p->p_cred); 7487c478bd9Sstevel@tonic-gate mutex_exit(&p->p_crlock); 7497c478bd9Sstevel@tonic-gate 7507c478bd9Sstevel@tonic-gate if (oldcr != NULL) { 7517c478bd9Sstevel@tonic-gate cred_t *newcr = crdup(oldcr); 7527c478bd9Sstevel@tonic-gate crsetprojid(newcr, projid); 7537c478bd9Sstevel@tonic-gate crfree(oldcr); 7547c478bd9Sstevel@tonic-gate 7557c478bd9Sstevel@tonic-gate mutex_enter(&p->p_crlock); 7567c478bd9Sstevel@tonic-gate oldcr = p->p_cred; 7577c478bd9Sstevel@tonic-gate p->p_cred = newcr; 7587c478bd9Sstevel@tonic-gate mutex_exit(&p->p_crlock); 7597c478bd9Sstevel@tonic-gate crfree(oldcr); 7607c478bd9Sstevel@tonic-gate } 7617c478bd9Sstevel@tonic-gate 7627c478bd9Sstevel@tonic-gate /* 7637c478bd9Sstevel@tonic-gate * Make sure that the number of processor sets is constant 7647c478bd9Sstevel@tonic-gate * across this operation. 7657c478bd9Sstevel@tonic-gate */ 7667c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 7677c478bd9Sstevel@tonic-gate 7687c478bd9Sstevel@tonic-gate projbuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_PROJ); 7697c478bd9Sstevel@tonic-gate zonebuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_ZONE); 7707c478bd9Sstevel@tonic-gate 7717c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 7727c478bd9Sstevel@tonic-gate mutex_enter(&p->p_lock); 7737c478bd9Sstevel@tonic-gate 7747c478bd9Sstevel@tonic-gate prev_tk = p->p_task; 7757c478bd9Sstevel@tonic-gate task_change(tk, p); 7767c478bd9Sstevel@tonic-gate 7777c478bd9Sstevel@tonic-gate /* 7787c478bd9Sstevel@tonic-gate * Now move threads one by one to their new project. 7797c478bd9Sstevel@tonic-gate */ 7807c478bd9Sstevel@tonic-gate changeproj(p, tk->tk_proj, zone, projbuf, zonebuf); 7817c478bd9Sstevel@tonic-gate if (flags & TASK_FINAL) 7827c478bd9Sstevel@tonic-gate p->p_task->tk_flags |= TASK_FINAL; 7837c478bd9Sstevel@tonic-gate 7847c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 7857c478bd9Sstevel@tonic-gate 7867c478bd9Sstevel@tonic-gate fss_freebuf(zonebuf, FSS_ALLOC_ZONE); 7877c478bd9Sstevel@tonic-gate fss_freebuf(projbuf, FSS_ALLOC_PROJ); 7887c478bd9Sstevel@tonic-gate return (prev_tk); 7897c478bd9Sstevel@tonic-gate } 7907c478bd9Sstevel@tonic-gate 7917c478bd9Sstevel@tonic-gate /* 7927c478bd9Sstevel@tonic-gate * rctl ops vectors 7937c478bd9Sstevel@tonic-gate */ 7947c478bd9Sstevel@tonic-gate static rctl_ops_t task_lwps_ops = { 7957c478bd9Sstevel@tonic-gate rcop_no_action, 7967c478bd9Sstevel@tonic-gate task_lwps_usage, 7977c478bd9Sstevel@tonic-gate task_lwps_set, 7987c478bd9Sstevel@tonic-gate task_lwps_test 7997c478bd9Sstevel@tonic-gate }; 8007c478bd9Sstevel@tonic-gate 8017c478bd9Sstevel@tonic-gate static rctl_ops_t task_cpu_time_ops = { 8027c478bd9Sstevel@tonic-gate rcop_no_action, 8037c478bd9Sstevel@tonic-gate task_cpu_time_usage, 8047c478bd9Sstevel@tonic-gate rcop_no_set, 8057c478bd9Sstevel@tonic-gate task_cpu_time_test 8067c478bd9Sstevel@tonic-gate }; 8077c478bd9Sstevel@tonic-gate 8087c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8097c478bd9Sstevel@tonic-gate /* 8107c478bd9Sstevel@tonic-gate * void task_init(void) 8117c478bd9Sstevel@tonic-gate * 8127c478bd9Sstevel@tonic-gate * Overview 8137c478bd9Sstevel@tonic-gate * task_init() initializes task-related hashes, caches, and the task id 8147c478bd9Sstevel@tonic-gate * space. Additionally, task_init() establishes p0 as a member of task0. 8157c478bd9Sstevel@tonic-gate * Called by main(). 8167c478bd9Sstevel@tonic-gate * 8177c478bd9Sstevel@tonic-gate * Return values 8187c478bd9Sstevel@tonic-gate * None. 8197c478bd9Sstevel@tonic-gate * 8207c478bd9Sstevel@tonic-gate * Caller's context 8217c478bd9Sstevel@tonic-gate * task_init() must be called prior to MP startup. 8227c478bd9Sstevel@tonic-gate */ 8237c478bd9Sstevel@tonic-gate void 8247c478bd9Sstevel@tonic-gate task_init(void) 8257c478bd9Sstevel@tonic-gate { 8267c478bd9Sstevel@tonic-gate proc_t *p = &p0; 8277c478bd9Sstevel@tonic-gate mod_hash_hndl_t hndl; 8287c478bd9Sstevel@tonic-gate rctl_set_t *set; 8297c478bd9Sstevel@tonic-gate rctl_alloc_gp_t *gp; 8307c478bd9Sstevel@tonic-gate rctl_entity_p_t e; 8317c478bd9Sstevel@tonic-gate /* 8327c478bd9Sstevel@tonic-gate * Initialize task_cache and taskid_space. 8337c478bd9Sstevel@tonic-gate */ 8347c478bd9Sstevel@tonic-gate task_cache = kmem_cache_create("task_cache", sizeof (task_t), 8357c478bd9Sstevel@tonic-gate 0, NULL, NULL, NULL, NULL, NULL, 0); 8367c478bd9Sstevel@tonic-gate taskid_space = id_space_create("taskid_space", 0, MAX_TASKID); 8377c478bd9Sstevel@tonic-gate 8387c478bd9Sstevel@tonic-gate /* 8397c478bd9Sstevel@tonic-gate * Initialize task hash table. 8407c478bd9Sstevel@tonic-gate */ 8417c478bd9Sstevel@tonic-gate task_hash = mod_hash_create_idhash("task_hash", task_hash_size, 8427c478bd9Sstevel@tonic-gate mod_hash_null_valdtor); 8437c478bd9Sstevel@tonic-gate 8447c478bd9Sstevel@tonic-gate /* 8457c478bd9Sstevel@tonic-gate * Initialize task-based rctls. 8467c478bd9Sstevel@tonic-gate */ 8477c478bd9Sstevel@tonic-gate rc_task_lwps = rctl_register("task.max-lwps", RCENTITY_TASK, 8487c478bd9Sstevel@tonic-gate RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_COUNT, INT_MAX, INT_MAX, 8497c478bd9Sstevel@tonic-gate &task_lwps_ops); 8507c478bd9Sstevel@tonic-gate rc_task_cpu_time = rctl_register("task.max-cpu-time", RCENTITY_TASK, 8517c478bd9Sstevel@tonic-gate RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_DENY_NEVER | 8527c478bd9Sstevel@tonic-gate RCTL_GLOBAL_CPU_TIME | RCTL_GLOBAL_INFINITE | 8537c478bd9Sstevel@tonic-gate RCTL_GLOBAL_UNOBSERVABLE | RCTL_GLOBAL_SECONDS, UINT64_MAX, 8547c478bd9Sstevel@tonic-gate UINT64_MAX, &task_cpu_time_ops); 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate /* 8577c478bd9Sstevel@tonic-gate * Create task0 and place p0 in it as a member. 8587c478bd9Sstevel@tonic-gate */ 8597c478bd9Sstevel@tonic-gate task0p = kmem_cache_alloc(task_cache, KM_SLEEP); 8607c478bd9Sstevel@tonic-gate bzero(task0p, sizeof (task_t)); 8617c478bd9Sstevel@tonic-gate 8627c478bd9Sstevel@tonic-gate task0p->tk_tkid = id_alloc(taskid_space); 8637c478bd9Sstevel@tonic-gate task0p->tk_usage = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 8640209230bSgjelinek task0p->tk_proj = project_hold_by_id(0, &zone0, 8657c478bd9Sstevel@tonic-gate PROJECT_HOLD_INSERT); 8667c478bd9Sstevel@tonic-gate task0p->tk_flags = TASK_NORMAL; 8677c478bd9Sstevel@tonic-gate task0p->tk_nlwps = p->p_lwpcnt; 8687c478bd9Sstevel@tonic-gate task0p->tk_zone = global_zone; 8697c478bd9Sstevel@tonic-gate 8707c478bd9Sstevel@tonic-gate set = rctl_set_create(); 8717c478bd9Sstevel@tonic-gate gp = rctl_set_init_prealloc(RCENTITY_TASK); 8727c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 8737c478bd9Sstevel@tonic-gate e.rcep_p.task = task0p; 8747c478bd9Sstevel@tonic-gate e.rcep_t = RCENTITY_TASK; 8757c478bd9Sstevel@tonic-gate task0p->tk_rctls = rctl_set_init(RCENTITY_TASK, curproc, &e, set, gp); 8767c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 8777c478bd9Sstevel@tonic-gate rctl_prealloc_destroy(gp); 8787c478bd9Sstevel@tonic-gate 8797c478bd9Sstevel@tonic-gate (void) mod_hash_reserve(task_hash, &hndl); 8807c478bd9Sstevel@tonic-gate mutex_enter(&task_hash_lock); 8817c478bd9Sstevel@tonic-gate ASSERT(task_find(task0p->tk_tkid, GLOBAL_ZONEID) == NULL); 8827c478bd9Sstevel@tonic-gate if (mod_hash_insert_reserve(task_hash, 8837c478bd9Sstevel@tonic-gate (mod_hash_key_t)(uintptr_t)task0p->tk_tkid, 8847c478bd9Sstevel@tonic-gate (mod_hash_val_t *)task0p, hndl) != 0) { 8857c478bd9Sstevel@tonic-gate mod_hash_cancel(task_hash, &hndl); 8867c478bd9Sstevel@tonic-gate panic("unable to insert task %d(%p)", task0p->tk_tkid, 8877c478bd9Sstevel@tonic-gate (void *)task0p); 8887c478bd9Sstevel@tonic-gate } 8897c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 8907c478bd9Sstevel@tonic-gate 8917c478bd9Sstevel@tonic-gate task0p->tk_memb_list = p; 8927c478bd9Sstevel@tonic-gate 8937c478bd9Sstevel@tonic-gate /* 8947c478bd9Sstevel@tonic-gate * Initialize task pointers for p0, including doubly linked list of task 8957c478bd9Sstevel@tonic-gate * members. 8967c478bd9Sstevel@tonic-gate */ 8977c478bd9Sstevel@tonic-gate p->p_task = task0p; 8987c478bd9Sstevel@tonic-gate p->p_taskprev = p->p_tasknext = p; 8997c478bd9Sstevel@tonic-gate task_hold(task0p); 9007c478bd9Sstevel@tonic-gate } 901