1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 30*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 31*7c478bd9Sstevel@tonic-gate #include <sys/exacct.h> 32*7c478bd9Sstevel@tonic-gate #include <sys/id_space.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/modhash.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/mutex.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/proc.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/project.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/rctl.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/task.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/time.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/zone.h> 44*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/fss.h> 46*7c478bd9Sstevel@tonic-gate #include <sys/class.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/project.h> 48*7c478bd9Sstevel@tonic-gate 49*7c478bd9Sstevel@tonic-gate /* 50*7c478bd9Sstevel@tonic-gate * Tasks 51*7c478bd9Sstevel@tonic-gate * 52*7c478bd9Sstevel@tonic-gate * A task is a collection of processes, associated with a common project ID 53*7c478bd9Sstevel@tonic-gate * and related by a common initial parent. The task primarily represents a 54*7c478bd9Sstevel@tonic-gate * natural process sequence with known resource usage, although it can also be 55*7c478bd9Sstevel@tonic-gate * viewed as a convenient grouping of processes for signal delivery, processor 56*7c478bd9Sstevel@tonic-gate * binding, and administrative operations. 57*7c478bd9Sstevel@tonic-gate * 58*7c478bd9Sstevel@tonic-gate * Membership and observership 59*7c478bd9Sstevel@tonic-gate * We can conceive of situations where processes outside of the task may wish 60*7c478bd9Sstevel@tonic-gate * to examine the resource usage of the task. Similarly, a number of the 61*7c478bd9Sstevel@tonic-gate * administrative operations on a task can be performed by processes who are 62*7c478bd9Sstevel@tonic-gate * not members of the task. Accordingly, we must design a locking strategy 63*7c478bd9Sstevel@tonic-gate * where observers of the task, who wish to examine or operate on the task, 64*7c478bd9Sstevel@tonic-gate * and members of task, who can perform the mentioned operations, as well as 65*7c478bd9Sstevel@tonic-gate * leave the task, see a consistent and correct representation of the task at 66*7c478bd9Sstevel@tonic-gate * all times. 67*7c478bd9Sstevel@tonic-gate * 68*7c478bd9Sstevel@tonic-gate * Locking 69*7c478bd9Sstevel@tonic-gate * Because the task membership is a new relation between processes, its 70*7c478bd9Sstevel@tonic-gate * locking becomes an additional responsibility of the pidlock/p_lock locking 71*7c478bd9Sstevel@tonic-gate * sequence; however, tasks closely resemble sessions and the session locking 72*7c478bd9Sstevel@tonic-gate * model is mostly appropriate for the interaction of tasks, processes, and 73*7c478bd9Sstevel@tonic-gate * procfs. 74*7c478bd9Sstevel@tonic-gate * 75*7c478bd9Sstevel@tonic-gate * kmutex_t task_hash_lock 76*7c478bd9Sstevel@tonic-gate * task_hash_lock is a global lock protecting the contents of the task 77*7c478bd9Sstevel@tonic-gate * ID-to-task pointer hash. Holders of task_hash_lock must not attempt to 78*7c478bd9Sstevel@tonic-gate * acquire pidlock or p_lock. 79*7c478bd9Sstevel@tonic-gate * uint_t tk_hold_count 80*7c478bd9Sstevel@tonic-gate * tk_hold_count, the number of members and observers of the current task, 81*7c478bd9Sstevel@tonic-gate * must be manipulated atomically. 82*7c478bd9Sstevel@tonic-gate * proc_t *tk_memb_list 83*7c478bd9Sstevel@tonic-gate * proc_t *p_tasknext 84*7c478bd9Sstevel@tonic-gate * proc_t *p_taskprev 85*7c478bd9Sstevel@tonic-gate * The task's membership list is protected by pidlock, and is therefore 86*7c478bd9Sstevel@tonic-gate * always acquired before any of its members' p_lock mutexes. The p_task 87*7c478bd9Sstevel@tonic-gate * member of the proc structure is protected by pidlock or p_lock for 88*7c478bd9Sstevel@tonic-gate * reading, and by both pidlock and p_lock for modification, as is done for 89*7c478bd9Sstevel@tonic-gate * p_sessp. The key point is that only the process can modify its p_task, 90*7c478bd9Sstevel@tonic-gate * and not any entity on the system. (/proc will use prlock() to prevent 91*7c478bd9Sstevel@tonic-gate * the process from leaving, as opposed to pidlock.) 92*7c478bd9Sstevel@tonic-gate * kmutex_t tk_usage_lock 93*7c478bd9Sstevel@tonic-gate * tk_usage_lock is a per-task lock protecting the contents of the task 94*7c478bd9Sstevel@tonic-gate * usage structure and tk_nlwps counter for the task.max-lwps resource 95*7c478bd9Sstevel@tonic-gate * control. 96*7c478bd9Sstevel@tonic-gate */ 97*7c478bd9Sstevel@tonic-gate 98*7c478bd9Sstevel@tonic-gate int task_hash_size = 256; 99*7c478bd9Sstevel@tonic-gate static kmutex_t task_hash_lock; 100*7c478bd9Sstevel@tonic-gate static mod_hash_t *task_hash; 101*7c478bd9Sstevel@tonic-gate 102*7c478bd9Sstevel@tonic-gate static id_space_t *taskid_space; /* global taskid space */ 103*7c478bd9Sstevel@tonic-gate static kmem_cache_t *task_cache; /* kmem cache for task structures */ 104*7c478bd9Sstevel@tonic-gate 105*7c478bd9Sstevel@tonic-gate rctl_hndl_t rc_task_lwps; 106*7c478bd9Sstevel@tonic-gate rctl_hndl_t rc_task_cpu_time; 107*7c478bd9Sstevel@tonic-gate 108*7c478bd9Sstevel@tonic-gate /* 109*7c478bd9Sstevel@tonic-gate * static rctl_qty_t task_usage_lwps(void *taskp) 110*7c478bd9Sstevel@tonic-gate * 111*7c478bd9Sstevel@tonic-gate * Overview 112*7c478bd9Sstevel@tonic-gate * task_usage_lwps() is the usage operation for the resource control 113*7c478bd9Sstevel@tonic-gate * associated with the number of LWPs in a task. 114*7c478bd9Sstevel@tonic-gate * 115*7c478bd9Sstevel@tonic-gate * Return values 116*7c478bd9Sstevel@tonic-gate * The number of LWPs in the given task is returned. 117*7c478bd9Sstevel@tonic-gate * 118*7c478bd9Sstevel@tonic-gate * Caller's context 119*7c478bd9Sstevel@tonic-gate * The p->p_lock must be held across the call. 120*7c478bd9Sstevel@tonic-gate */ 121*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 122*7c478bd9Sstevel@tonic-gate static rctl_qty_t 123*7c478bd9Sstevel@tonic-gate task_lwps_usage(rctl_t *r, proc_t *p) 124*7c478bd9Sstevel@tonic-gate { 125*7c478bd9Sstevel@tonic-gate task_t *t; 126*7c478bd9Sstevel@tonic-gate rctl_qty_t nlwps; 127*7c478bd9Sstevel@tonic-gate 128*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 129*7c478bd9Sstevel@tonic-gate 130*7c478bd9Sstevel@tonic-gate t = p->p_task; 131*7c478bd9Sstevel@tonic-gate mutex_enter(&p->p_zone->zone_nlwps_lock); 132*7c478bd9Sstevel@tonic-gate nlwps = t->tk_nlwps; 133*7c478bd9Sstevel@tonic-gate mutex_exit(&p->p_zone->zone_nlwps_lock); 134*7c478bd9Sstevel@tonic-gate 135*7c478bd9Sstevel@tonic-gate return (nlwps); 136*7c478bd9Sstevel@tonic-gate } 137*7c478bd9Sstevel@tonic-gate 138*7c478bd9Sstevel@tonic-gate /* 139*7c478bd9Sstevel@tonic-gate * static int task_test_lwps(void *taskp, rctl_val_t *, int64_t incr, 140*7c478bd9Sstevel@tonic-gate * int flags) 141*7c478bd9Sstevel@tonic-gate * 142*7c478bd9Sstevel@tonic-gate * Overview 143*7c478bd9Sstevel@tonic-gate * task_test_lwps() is the test-if-valid-increment for the resource control 144*7c478bd9Sstevel@tonic-gate * for the number of processes in a task. 145*7c478bd9Sstevel@tonic-gate * 146*7c478bd9Sstevel@tonic-gate * Return values 147*7c478bd9Sstevel@tonic-gate * 0 if the threshold limit was not passed, 1 if the limit was passed. 148*7c478bd9Sstevel@tonic-gate * 149*7c478bd9Sstevel@tonic-gate * Caller's context 150*7c478bd9Sstevel@tonic-gate * p->p_lock must be held across the call. 151*7c478bd9Sstevel@tonic-gate */ 152*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 153*7c478bd9Sstevel@tonic-gate static int 154*7c478bd9Sstevel@tonic-gate task_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl, 155*7c478bd9Sstevel@tonic-gate rctl_qty_t incr, 156*7c478bd9Sstevel@tonic-gate uint_t flags) 157*7c478bd9Sstevel@tonic-gate { 158*7c478bd9Sstevel@tonic-gate rctl_qty_t nlwps; 159*7c478bd9Sstevel@tonic-gate 160*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 161*7c478bd9Sstevel@tonic-gate ASSERT(e->rcep_t == RCENTITY_TASK); 162*7c478bd9Sstevel@tonic-gate if (e->rcep_p.task == NULL) 163*7c478bd9Sstevel@tonic-gate return (0); 164*7c478bd9Sstevel@tonic-gate 165*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&(e->rcep_p.task->tk_zone->zone_nlwps_lock))); 166*7c478bd9Sstevel@tonic-gate nlwps = e->rcep_p.task->tk_nlwps; 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate if (nlwps + incr > rcntl->rcv_value) 169*7c478bd9Sstevel@tonic-gate return (1); 170*7c478bd9Sstevel@tonic-gate 171*7c478bd9Sstevel@tonic-gate return (0); 172*7c478bd9Sstevel@tonic-gate } 173*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 174*7c478bd9Sstevel@tonic-gate static int 175*7c478bd9Sstevel@tonic-gate task_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) { 176*7c478bd9Sstevel@tonic-gate 177*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 178*7c478bd9Sstevel@tonic-gate ASSERT(e->rcep_t == RCENTITY_TASK); 179*7c478bd9Sstevel@tonic-gate if (e->rcep_p.task == NULL) 180*7c478bd9Sstevel@tonic-gate return (0); 181*7c478bd9Sstevel@tonic-gate 182*7c478bd9Sstevel@tonic-gate e->rcep_p.task->tk_nlwps_ctl = nv; 183*7c478bd9Sstevel@tonic-gate return (0); 184*7c478bd9Sstevel@tonic-gate } 185*7c478bd9Sstevel@tonic-gate 186*7c478bd9Sstevel@tonic-gate /* 187*7c478bd9Sstevel@tonic-gate * static rctl_qty_t task_usage_cpu_secs(void *taskp) 188*7c478bd9Sstevel@tonic-gate * 189*7c478bd9Sstevel@tonic-gate * Overview 190*7c478bd9Sstevel@tonic-gate * task_usage_cpu_secs() is the usage operation for the resource control 191*7c478bd9Sstevel@tonic-gate * associated with the total accrued CPU seconds for a task. 192*7c478bd9Sstevel@tonic-gate * 193*7c478bd9Sstevel@tonic-gate * Return values 194*7c478bd9Sstevel@tonic-gate * The number of CPU seconds consumed by the task is returned. 195*7c478bd9Sstevel@tonic-gate * 196*7c478bd9Sstevel@tonic-gate * Caller's context 197*7c478bd9Sstevel@tonic-gate * The given task must be held across the call. 198*7c478bd9Sstevel@tonic-gate */ 199*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 200*7c478bd9Sstevel@tonic-gate static rctl_qty_t 201*7c478bd9Sstevel@tonic-gate task_cpu_time_usage(rctl_t *r, proc_t *p) 202*7c478bd9Sstevel@tonic-gate { 203*7c478bd9Sstevel@tonic-gate task_t *t = p->p_task; 204*7c478bd9Sstevel@tonic-gate 205*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 206*7c478bd9Sstevel@tonic-gate return (t->tk_cpu_time / hz); 207*7c478bd9Sstevel@tonic-gate } 208*7c478bd9Sstevel@tonic-gate 209*7c478bd9Sstevel@tonic-gate /* 210*7c478bd9Sstevel@tonic-gate * static int task_test_cpu_secs(void *taskp, rctl_val_t *, int64_t incr, 211*7c478bd9Sstevel@tonic-gate * int flags) 212*7c478bd9Sstevel@tonic-gate * 213*7c478bd9Sstevel@tonic-gate * Overview 214*7c478bd9Sstevel@tonic-gate * task_test_cpu_secs() is the test-if-valid-increment for the resource 215*7c478bd9Sstevel@tonic-gate * control for the total accrued CPU seconds for a task. 216*7c478bd9Sstevel@tonic-gate * 217*7c478bd9Sstevel@tonic-gate * Return values 218*7c478bd9Sstevel@tonic-gate * 0 if the threshold limit was not passed, 1 if the limit was passed. 219*7c478bd9Sstevel@tonic-gate * 220*7c478bd9Sstevel@tonic-gate * Caller's context 221*7c478bd9Sstevel@tonic-gate * The given task must be held across the call. 222*7c478bd9Sstevel@tonic-gate */ 223*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 224*7c478bd9Sstevel@tonic-gate static int 225*7c478bd9Sstevel@tonic-gate task_cpu_time_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, 226*7c478bd9Sstevel@tonic-gate struct rctl_val *rcntl, rctl_qty_t incr, uint_t flags) 227*7c478bd9Sstevel@tonic-gate { 228*7c478bd9Sstevel@tonic-gate task_t *t; 229*7c478bd9Sstevel@tonic-gate 230*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 231*7c478bd9Sstevel@tonic-gate ASSERT(e->rcep_t == RCENTITY_TASK); 232*7c478bd9Sstevel@tonic-gate if (e->rcep_p.task == NULL) 233*7c478bd9Sstevel@tonic-gate return (0); 234*7c478bd9Sstevel@tonic-gate 235*7c478bd9Sstevel@tonic-gate t = e->rcep_p.task; 236*7c478bd9Sstevel@tonic-gate if ((t->tk_cpu_time + incr) / hz >= rcntl->rcv_value) 237*7c478bd9Sstevel@tonic-gate return (1); 238*7c478bd9Sstevel@tonic-gate 239*7c478bd9Sstevel@tonic-gate return (0); 240*7c478bd9Sstevel@tonic-gate } 241*7c478bd9Sstevel@tonic-gate 242*7c478bd9Sstevel@tonic-gate static task_t * 243*7c478bd9Sstevel@tonic-gate task_find(taskid_t id, zoneid_t zoneid) 244*7c478bd9Sstevel@tonic-gate { 245*7c478bd9Sstevel@tonic-gate task_t *tk; 246*7c478bd9Sstevel@tonic-gate 247*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&task_hash_lock)); 248*7c478bd9Sstevel@tonic-gate 249*7c478bd9Sstevel@tonic-gate if (mod_hash_find(task_hash, (mod_hash_key_t)(uintptr_t)id, 250*7c478bd9Sstevel@tonic-gate (mod_hash_val_t *)&tk) == MH_ERR_NOTFOUND || 251*7c478bd9Sstevel@tonic-gate (zoneid != ALL_ZONES && zoneid != tk->tk_zone->zone_id)) 252*7c478bd9Sstevel@tonic-gate return (NULL); 253*7c478bd9Sstevel@tonic-gate 254*7c478bd9Sstevel@tonic-gate return (tk); 255*7c478bd9Sstevel@tonic-gate } 256*7c478bd9Sstevel@tonic-gate 257*7c478bd9Sstevel@tonic-gate /* 258*7c478bd9Sstevel@tonic-gate * task_hold_by_id(), task_hold_by_id_zone() 259*7c478bd9Sstevel@tonic-gate * 260*7c478bd9Sstevel@tonic-gate * Overview 261*7c478bd9Sstevel@tonic-gate * task_hold_by_id() is used to take a reference on a task by its task id, 262*7c478bd9Sstevel@tonic-gate * supporting the various system call interfaces for obtaining resource data, 263*7c478bd9Sstevel@tonic-gate * delivering signals, and so forth. 264*7c478bd9Sstevel@tonic-gate * 265*7c478bd9Sstevel@tonic-gate * Return values 266*7c478bd9Sstevel@tonic-gate * Returns a pointer to the task_t with taskid_t id. The task is returned 267*7c478bd9Sstevel@tonic-gate * with its hold count incremented by one. Returns NULL if there 268*7c478bd9Sstevel@tonic-gate * is no task with the requested id. 269*7c478bd9Sstevel@tonic-gate * 270*7c478bd9Sstevel@tonic-gate * Caller's context 271*7c478bd9Sstevel@tonic-gate * Caller must not be holding task_hash_lock. No restrictions on context. 272*7c478bd9Sstevel@tonic-gate */ 273*7c478bd9Sstevel@tonic-gate task_t * 274*7c478bd9Sstevel@tonic-gate task_hold_by_id_zone(taskid_t id, zoneid_t zoneid) 275*7c478bd9Sstevel@tonic-gate { 276*7c478bd9Sstevel@tonic-gate task_t *tk; 277*7c478bd9Sstevel@tonic-gate 278*7c478bd9Sstevel@tonic-gate mutex_enter(&task_hash_lock); 279*7c478bd9Sstevel@tonic-gate if ((tk = task_find(id, zoneid)) != NULL) 280*7c478bd9Sstevel@tonic-gate atomic_add_32(&tk->tk_hold_count, 1); 281*7c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 282*7c478bd9Sstevel@tonic-gate 283*7c478bd9Sstevel@tonic-gate return (tk); 284*7c478bd9Sstevel@tonic-gate } 285*7c478bd9Sstevel@tonic-gate 286*7c478bd9Sstevel@tonic-gate task_t * 287*7c478bd9Sstevel@tonic-gate task_hold_by_id(taskid_t id) 288*7c478bd9Sstevel@tonic-gate { 289*7c478bd9Sstevel@tonic-gate zoneid_t zoneid; 290*7c478bd9Sstevel@tonic-gate 291*7c478bd9Sstevel@tonic-gate if (INGLOBALZONE(curproc)) 292*7c478bd9Sstevel@tonic-gate zoneid = ALL_ZONES; 293*7c478bd9Sstevel@tonic-gate else 294*7c478bd9Sstevel@tonic-gate zoneid = getzoneid(); 295*7c478bd9Sstevel@tonic-gate return (task_hold_by_id_zone(id, zoneid)); 296*7c478bd9Sstevel@tonic-gate } 297*7c478bd9Sstevel@tonic-gate 298*7c478bd9Sstevel@tonic-gate /* 299*7c478bd9Sstevel@tonic-gate * void task_hold(task_t *) 300*7c478bd9Sstevel@tonic-gate * 301*7c478bd9Sstevel@tonic-gate * Overview 302*7c478bd9Sstevel@tonic-gate * task_hold() is used to take an additional reference to the given task. 303*7c478bd9Sstevel@tonic-gate * 304*7c478bd9Sstevel@tonic-gate * Return values 305*7c478bd9Sstevel@tonic-gate * None. 306*7c478bd9Sstevel@tonic-gate * 307*7c478bd9Sstevel@tonic-gate * Caller's context 308*7c478bd9Sstevel@tonic-gate * No restriction on context. 309*7c478bd9Sstevel@tonic-gate */ 310*7c478bd9Sstevel@tonic-gate void 311*7c478bd9Sstevel@tonic-gate task_hold(task_t *tk) 312*7c478bd9Sstevel@tonic-gate { 313*7c478bd9Sstevel@tonic-gate atomic_add_32(&tk->tk_hold_count, 1); 314*7c478bd9Sstevel@tonic-gate } 315*7c478bd9Sstevel@tonic-gate 316*7c478bd9Sstevel@tonic-gate /* 317*7c478bd9Sstevel@tonic-gate * void task_rele(task_t *) 318*7c478bd9Sstevel@tonic-gate * 319*7c478bd9Sstevel@tonic-gate * Overview 320*7c478bd9Sstevel@tonic-gate * task_rele() relinquishes a reference on the given task, which was acquired 321*7c478bd9Sstevel@tonic-gate * via task_hold() or task_hold_by_id(). If this is the last member or 322*7c478bd9Sstevel@tonic-gate * observer of the task, dispatch it for commitment via the accounting 323*7c478bd9Sstevel@tonic-gate * subsystem. 324*7c478bd9Sstevel@tonic-gate * 325*7c478bd9Sstevel@tonic-gate * Return values 326*7c478bd9Sstevel@tonic-gate * None. 327*7c478bd9Sstevel@tonic-gate * 328*7c478bd9Sstevel@tonic-gate * Caller's context 329*7c478bd9Sstevel@tonic-gate * Caller must not be holding the task_hash_lock. 330*7c478bd9Sstevel@tonic-gate * Caller's context must be acceptable for KM_SLEEP allocations. 331*7c478bd9Sstevel@tonic-gate */ 332*7c478bd9Sstevel@tonic-gate void 333*7c478bd9Sstevel@tonic-gate task_rele(task_t *tk) 334*7c478bd9Sstevel@tonic-gate { 335*7c478bd9Sstevel@tonic-gate mutex_enter(&task_hash_lock); 336*7c478bd9Sstevel@tonic-gate if (atomic_add_32_nv(&tk->tk_hold_count, -1) > 0) { 337*7c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 338*7c478bd9Sstevel@tonic-gate return; 339*7c478bd9Sstevel@tonic-gate } 340*7c478bd9Sstevel@tonic-gate 341*7c478bd9Sstevel@tonic-gate mutex_enter(&tk->tk_zone->zone_nlwps_lock); 342*7c478bd9Sstevel@tonic-gate tk->tk_proj->kpj_ntasks--; 343*7c478bd9Sstevel@tonic-gate mutex_exit(&tk->tk_zone->zone_nlwps_lock); 344*7c478bd9Sstevel@tonic-gate 345*7c478bd9Sstevel@tonic-gate if (mod_hash_destroy(task_hash, 346*7c478bd9Sstevel@tonic-gate (mod_hash_key_t)(uintptr_t)tk->tk_tkid) != 0) 347*7c478bd9Sstevel@tonic-gate panic("unable to delete task %d", tk->tk_tkid); 348*7c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 349*7c478bd9Sstevel@tonic-gate 350*7c478bd9Sstevel@tonic-gate /* 351*7c478bd9Sstevel@tonic-gate * At this point, there are no members or observers of the task, so we 352*7c478bd9Sstevel@tonic-gate * can safely send it on for commitment to the accounting subsystem. 353*7c478bd9Sstevel@tonic-gate * The task will be destroyed in task_end() subsequent to commitment. 354*7c478bd9Sstevel@tonic-gate */ 355*7c478bd9Sstevel@tonic-gate (void) taskq_dispatch(exacct_queue, exacct_commit_task, tk, KM_SLEEP); 356*7c478bd9Sstevel@tonic-gate } 357*7c478bd9Sstevel@tonic-gate 358*7c478bd9Sstevel@tonic-gate /* 359*7c478bd9Sstevel@tonic-gate * task_t *task_create(projid_t, zone *) 360*7c478bd9Sstevel@tonic-gate * 361*7c478bd9Sstevel@tonic-gate * Overview 362*7c478bd9Sstevel@tonic-gate * A process constructing a new task calls task_create() to construct and 363*7c478bd9Sstevel@tonic-gate * preinitialize the task for the appropriate destination project. Only one 364*7c478bd9Sstevel@tonic-gate * task, the primordial task0, is not created with task_create(). 365*7c478bd9Sstevel@tonic-gate * 366*7c478bd9Sstevel@tonic-gate * Return values 367*7c478bd9Sstevel@tonic-gate * None. 368*7c478bd9Sstevel@tonic-gate * 369*7c478bd9Sstevel@tonic-gate * Caller's context 370*7c478bd9Sstevel@tonic-gate * Caller's context should be safe for KM_SLEEP allocations. 371*7c478bd9Sstevel@tonic-gate * The caller should appropriately bump the kpj_ntasks counter on the 372*7c478bd9Sstevel@tonic-gate * project that contains this task. 373*7c478bd9Sstevel@tonic-gate */ 374*7c478bd9Sstevel@tonic-gate task_t * 375*7c478bd9Sstevel@tonic-gate task_create(projid_t projid, zone_t *zone) 376*7c478bd9Sstevel@tonic-gate { 377*7c478bd9Sstevel@tonic-gate task_t *tk = kmem_cache_alloc(task_cache, KM_SLEEP); 378*7c478bd9Sstevel@tonic-gate task_t *ancestor_tk; 379*7c478bd9Sstevel@tonic-gate taskid_t tkid; 380*7c478bd9Sstevel@tonic-gate task_usage_t *tu = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 381*7c478bd9Sstevel@tonic-gate mod_hash_hndl_t hndl; 382*7c478bd9Sstevel@tonic-gate rctl_set_t *set = rctl_set_create(); 383*7c478bd9Sstevel@tonic-gate rctl_alloc_gp_t *gp; 384*7c478bd9Sstevel@tonic-gate rctl_entity_p_t e; 385*7c478bd9Sstevel@tonic-gate 386*7c478bd9Sstevel@tonic-gate bzero(tk, sizeof (task_t)); 387*7c478bd9Sstevel@tonic-gate 388*7c478bd9Sstevel@tonic-gate tk->tk_tkid = tkid = id_alloc(taskid_space); 389*7c478bd9Sstevel@tonic-gate tk->tk_nlwps = 0; 390*7c478bd9Sstevel@tonic-gate tk->tk_nlwps_ctl = INT_MAX; 391*7c478bd9Sstevel@tonic-gate tk->tk_usage = tu; 392*7c478bd9Sstevel@tonic-gate tk->tk_proj = project_hold_by_id(projid, zone->zone_id, 393*7c478bd9Sstevel@tonic-gate PROJECT_HOLD_INSERT); 394*7c478bd9Sstevel@tonic-gate tk->tk_flags = TASK_NORMAL; 395*7c478bd9Sstevel@tonic-gate 396*7c478bd9Sstevel@tonic-gate /* 397*7c478bd9Sstevel@tonic-gate * Copy ancestor task's resource controls. 398*7c478bd9Sstevel@tonic-gate */ 399*7c478bd9Sstevel@tonic-gate zone_task_hold(zone); 400*7c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 401*7c478bd9Sstevel@tonic-gate ancestor_tk = curproc->p_task; 402*7c478bd9Sstevel@tonic-gate task_hold(ancestor_tk); 403*7c478bd9Sstevel@tonic-gate tk->tk_zone = zone; 404*7c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 405*7c478bd9Sstevel@tonic-gate 406*7c478bd9Sstevel@tonic-gate for (;;) { 407*7c478bd9Sstevel@tonic-gate gp = rctl_set_dup_prealloc(ancestor_tk->tk_rctls); 408*7c478bd9Sstevel@tonic-gate 409*7c478bd9Sstevel@tonic-gate mutex_enter(&ancestor_tk->tk_rctls->rcs_lock); 410*7c478bd9Sstevel@tonic-gate if (rctl_set_dup_ready(ancestor_tk->tk_rctls, gp)) 411*7c478bd9Sstevel@tonic-gate break; 412*7c478bd9Sstevel@tonic-gate 413*7c478bd9Sstevel@tonic-gate mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 414*7c478bd9Sstevel@tonic-gate 415*7c478bd9Sstevel@tonic-gate rctl_prealloc_destroy(gp); 416*7c478bd9Sstevel@tonic-gate } 417*7c478bd9Sstevel@tonic-gate 418*7c478bd9Sstevel@tonic-gate /* 419*7c478bd9Sstevel@tonic-gate * At this point, curproc does not have the appropriate linkage 420*7c478bd9Sstevel@tonic-gate * through the task to the project. So, rctl_set_dup should only 421*7c478bd9Sstevel@tonic-gate * copy the rctls, and leave the callbacks for later. 422*7c478bd9Sstevel@tonic-gate */ 423*7c478bd9Sstevel@tonic-gate e.rcep_p.task = tk; 424*7c478bd9Sstevel@tonic-gate e.rcep_t = RCENTITY_TASK; 425*7c478bd9Sstevel@tonic-gate tk->tk_rctls = rctl_set_dup(ancestor_tk->tk_rctls, curproc, curproc, &e, 426*7c478bd9Sstevel@tonic-gate set, gp, RCD_DUP); 427*7c478bd9Sstevel@tonic-gate mutex_exit(&ancestor_tk->tk_rctls->rcs_lock); 428*7c478bd9Sstevel@tonic-gate 429*7c478bd9Sstevel@tonic-gate rctl_prealloc_destroy(gp); 430*7c478bd9Sstevel@tonic-gate 431*7c478bd9Sstevel@tonic-gate /* 432*7c478bd9Sstevel@tonic-gate * Record the ancestor task's ID for use by extended accounting. 433*7c478bd9Sstevel@tonic-gate */ 434*7c478bd9Sstevel@tonic-gate tu->tu_anctaskid = ancestor_tk->tk_tkid; 435*7c478bd9Sstevel@tonic-gate task_rele(ancestor_tk); 436*7c478bd9Sstevel@tonic-gate 437*7c478bd9Sstevel@tonic-gate /* 438*7c478bd9Sstevel@tonic-gate * Put new task structure in the hash table. 439*7c478bd9Sstevel@tonic-gate */ 440*7c478bd9Sstevel@tonic-gate (void) mod_hash_reserve(task_hash, &hndl); 441*7c478bd9Sstevel@tonic-gate mutex_enter(&task_hash_lock); 442*7c478bd9Sstevel@tonic-gate ASSERT(task_find(tkid, getzoneid()) == NULL); 443*7c478bd9Sstevel@tonic-gate if (mod_hash_insert_reserve(task_hash, (mod_hash_key_t)(uintptr_t)tkid, 444*7c478bd9Sstevel@tonic-gate (mod_hash_val_t *)tk, hndl) != 0) { 445*7c478bd9Sstevel@tonic-gate mod_hash_cancel(task_hash, &hndl); 446*7c478bd9Sstevel@tonic-gate panic("unable to insert task %d(%p)", tkid, (void *)tk); 447*7c478bd9Sstevel@tonic-gate } 448*7c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 449*7c478bd9Sstevel@tonic-gate 450*7c478bd9Sstevel@tonic-gate return (tk); 451*7c478bd9Sstevel@tonic-gate } 452*7c478bd9Sstevel@tonic-gate 453*7c478bd9Sstevel@tonic-gate /* 454*7c478bd9Sstevel@tonic-gate * void task_attach(task_t *, proc_t *) 455*7c478bd9Sstevel@tonic-gate * 456*7c478bd9Sstevel@tonic-gate * Overview 457*7c478bd9Sstevel@tonic-gate * task_attach() is used to attach a process to a task; this operation is only 458*7c478bd9Sstevel@tonic-gate * performed as a result of a fork() or settaskid() system call. The proc_t's 459*7c478bd9Sstevel@tonic-gate * p_tasknext and p_taskprev fields will be set such that the proc_t is a 460*7c478bd9Sstevel@tonic-gate * member of the doubly-linked list of proc_t's that make up the task. 461*7c478bd9Sstevel@tonic-gate * 462*7c478bd9Sstevel@tonic-gate * Return values 463*7c478bd9Sstevel@tonic-gate * None. 464*7c478bd9Sstevel@tonic-gate * 465*7c478bd9Sstevel@tonic-gate * Caller's context 466*7c478bd9Sstevel@tonic-gate * pidlock and p->p_lock must be held on entry. 467*7c478bd9Sstevel@tonic-gate */ 468*7c478bd9Sstevel@tonic-gate void 469*7c478bd9Sstevel@tonic-gate task_attach(task_t *tk, proc_t *p) 470*7c478bd9Sstevel@tonic-gate { 471*7c478bd9Sstevel@tonic-gate proc_t *first, *prev; 472*7c478bd9Sstevel@tonic-gate rctl_entity_p_t e; 473*7c478bd9Sstevel@tonic-gate ASSERT(tk != NULL); 474*7c478bd9Sstevel@tonic-gate ASSERT(p != NULL); 475*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 476*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 477*7c478bd9Sstevel@tonic-gate 478*7c478bd9Sstevel@tonic-gate if (tk->tk_memb_list == NULL) { 479*7c478bd9Sstevel@tonic-gate p->p_tasknext = p; 480*7c478bd9Sstevel@tonic-gate p->p_taskprev = p; 481*7c478bd9Sstevel@tonic-gate } else { 482*7c478bd9Sstevel@tonic-gate first = tk->tk_memb_list; 483*7c478bd9Sstevel@tonic-gate prev = first->p_taskprev; 484*7c478bd9Sstevel@tonic-gate first->p_taskprev = p; 485*7c478bd9Sstevel@tonic-gate p->p_tasknext = first; 486*7c478bd9Sstevel@tonic-gate p->p_taskprev = prev; 487*7c478bd9Sstevel@tonic-gate prev->p_tasknext = p; 488*7c478bd9Sstevel@tonic-gate } 489*7c478bd9Sstevel@tonic-gate tk->tk_memb_list = p; 490*7c478bd9Sstevel@tonic-gate task_hold(tk); 491*7c478bd9Sstevel@tonic-gate p->p_task = tk; 492*7c478bd9Sstevel@tonic-gate 493*7c478bd9Sstevel@tonic-gate /* 494*7c478bd9Sstevel@tonic-gate * Now that the linkage from process to task and project is 495*7c478bd9Sstevel@tonic-gate * complete, do the required callbacks for the task and project 496*7c478bd9Sstevel@tonic-gate * rctl sets. 497*7c478bd9Sstevel@tonic-gate */ 498*7c478bd9Sstevel@tonic-gate e.rcep_p.proj = tk->tk_proj; 499*7c478bd9Sstevel@tonic-gate e.rcep_t = RCENTITY_PROJECT; 500*7c478bd9Sstevel@tonic-gate (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_proj->kpj_rctls, NULL, 501*7c478bd9Sstevel@tonic-gate RCD_CALLBACK); 502*7c478bd9Sstevel@tonic-gate 503*7c478bd9Sstevel@tonic-gate e.rcep_p.task = tk; 504*7c478bd9Sstevel@tonic-gate e.rcep_t = RCENTITY_TASK; 505*7c478bd9Sstevel@tonic-gate (void) rctl_set_dup(NULL, NULL, p, &e, tk->tk_rctls, NULL, 506*7c478bd9Sstevel@tonic-gate RCD_CALLBACK); 507*7c478bd9Sstevel@tonic-gate 508*7c478bd9Sstevel@tonic-gate } 509*7c478bd9Sstevel@tonic-gate 510*7c478bd9Sstevel@tonic-gate /* 511*7c478bd9Sstevel@tonic-gate * task_begin() 512*7c478bd9Sstevel@tonic-gate * 513*7c478bd9Sstevel@tonic-gate * Overview 514*7c478bd9Sstevel@tonic-gate * A process constructing a new task calls task_begin() to initialize the 515*7c478bd9Sstevel@tonic-gate * task, by attaching itself as a member. 516*7c478bd9Sstevel@tonic-gate * 517*7c478bd9Sstevel@tonic-gate * Return values 518*7c478bd9Sstevel@tonic-gate * None. 519*7c478bd9Sstevel@tonic-gate * 520*7c478bd9Sstevel@tonic-gate * Caller's context 521*7c478bd9Sstevel@tonic-gate * pidlock and p_lock must be held across the call to task_begin(). 522*7c478bd9Sstevel@tonic-gate */ 523*7c478bd9Sstevel@tonic-gate void 524*7c478bd9Sstevel@tonic-gate task_begin(task_t *tk, proc_t *p) 525*7c478bd9Sstevel@tonic-gate { 526*7c478bd9Sstevel@tonic-gate timestruc_t ts; 527*7c478bd9Sstevel@tonic-gate task_usage_t *tu; 528*7c478bd9Sstevel@tonic-gate 529*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 530*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 531*7c478bd9Sstevel@tonic-gate 532*7c478bd9Sstevel@tonic-gate mutex_enter(&tk->tk_usage_lock); 533*7c478bd9Sstevel@tonic-gate tu = tk->tk_usage; 534*7c478bd9Sstevel@tonic-gate gethrestime(&ts); 535*7c478bd9Sstevel@tonic-gate tu->tu_startsec = (uint64_t)ts.tv_sec; 536*7c478bd9Sstevel@tonic-gate tu->tu_startnsec = (uint64_t)ts.tv_nsec; 537*7c478bd9Sstevel@tonic-gate mutex_exit(&tk->tk_usage_lock); 538*7c478bd9Sstevel@tonic-gate 539*7c478bd9Sstevel@tonic-gate /* 540*7c478bd9Sstevel@tonic-gate * Join process to the task as a member. 541*7c478bd9Sstevel@tonic-gate */ 542*7c478bd9Sstevel@tonic-gate task_attach(tk, p); 543*7c478bd9Sstevel@tonic-gate } 544*7c478bd9Sstevel@tonic-gate 545*7c478bd9Sstevel@tonic-gate /* 546*7c478bd9Sstevel@tonic-gate * void task_detach(proc_t *) 547*7c478bd9Sstevel@tonic-gate * 548*7c478bd9Sstevel@tonic-gate * Overview 549*7c478bd9Sstevel@tonic-gate * task_detach() removes the specified process from its task. task_detach 550*7c478bd9Sstevel@tonic-gate * sets the process's task membership to NULL, in anticipation of a final exit 551*7c478bd9Sstevel@tonic-gate * or of joining a new task. Because task_rele() requires a context safe for 552*7c478bd9Sstevel@tonic-gate * KM_SLEEP allocations, a task_detach() is followed by a subsequent 553*7c478bd9Sstevel@tonic-gate * task_rele() once appropriate context is available. 554*7c478bd9Sstevel@tonic-gate * 555*7c478bd9Sstevel@tonic-gate * Because task_detach() involves relinquishing the process's membership in 556*7c478bd9Sstevel@tonic-gate * the project, any observational rctls the process may have had on the task 557*7c478bd9Sstevel@tonic-gate * or project are destroyed. 558*7c478bd9Sstevel@tonic-gate * 559*7c478bd9Sstevel@tonic-gate * Return values 560*7c478bd9Sstevel@tonic-gate * None. 561*7c478bd9Sstevel@tonic-gate * 562*7c478bd9Sstevel@tonic-gate * Caller's context 563*7c478bd9Sstevel@tonic-gate * pidlock and p_lock held across task_detach(). 564*7c478bd9Sstevel@tonic-gate */ 565*7c478bd9Sstevel@tonic-gate void 566*7c478bd9Sstevel@tonic-gate task_detach(proc_t *p) 567*7c478bd9Sstevel@tonic-gate { 568*7c478bd9Sstevel@tonic-gate task_t *tk = p->p_task; 569*7c478bd9Sstevel@tonic-gate 570*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 571*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 572*7c478bd9Sstevel@tonic-gate ASSERT(p->p_task != NULL); 573*7c478bd9Sstevel@tonic-gate ASSERT(tk->tk_memb_list != NULL); 574*7c478bd9Sstevel@tonic-gate 575*7c478bd9Sstevel@tonic-gate if (tk->tk_memb_list == p) 576*7c478bd9Sstevel@tonic-gate tk->tk_memb_list = p->p_tasknext; 577*7c478bd9Sstevel@tonic-gate if (tk->tk_memb_list == p) 578*7c478bd9Sstevel@tonic-gate tk->tk_memb_list = NULL; 579*7c478bd9Sstevel@tonic-gate p->p_taskprev->p_tasknext = p->p_tasknext; 580*7c478bd9Sstevel@tonic-gate p->p_tasknext->p_taskprev = p->p_taskprev; 581*7c478bd9Sstevel@tonic-gate 582*7c478bd9Sstevel@tonic-gate rctl_set_tearoff(p->p_task->tk_rctls, p); 583*7c478bd9Sstevel@tonic-gate rctl_set_tearoff(p->p_task->tk_proj->kpj_rctls, p); 584*7c478bd9Sstevel@tonic-gate 585*7c478bd9Sstevel@tonic-gate p->p_task = NULL; 586*7c478bd9Sstevel@tonic-gate p->p_tasknext = p->p_taskprev = NULL; 587*7c478bd9Sstevel@tonic-gate } 588*7c478bd9Sstevel@tonic-gate 589*7c478bd9Sstevel@tonic-gate /* 590*7c478bd9Sstevel@tonic-gate * task_change(task_t *, proc_t *) 591*7c478bd9Sstevel@tonic-gate * 592*7c478bd9Sstevel@tonic-gate * Overview 593*7c478bd9Sstevel@tonic-gate * task_change() removes the specified process from its current task. The 594*7c478bd9Sstevel@tonic-gate * process is then attached to the specified task. This routine is called 595*7c478bd9Sstevel@tonic-gate * from settaskid() when process is being moved to a new task. 596*7c478bd9Sstevel@tonic-gate * 597*7c478bd9Sstevel@tonic-gate * Return values 598*7c478bd9Sstevel@tonic-gate * None. 599*7c478bd9Sstevel@tonic-gate * 600*7c478bd9Sstevel@tonic-gate * Caller's context 601*7c478bd9Sstevel@tonic-gate * pidlock and p_lock held across task_change() 602*7c478bd9Sstevel@tonic-gate */ 603*7c478bd9Sstevel@tonic-gate void 604*7c478bd9Sstevel@tonic-gate task_change(task_t *newtk, proc_t *p) 605*7c478bd9Sstevel@tonic-gate { 606*7c478bd9Sstevel@tonic-gate task_t *oldtk = p->p_task; 607*7c478bd9Sstevel@tonic-gate 608*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 609*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 610*7c478bd9Sstevel@tonic-gate ASSERT(oldtk != NULL); 611*7c478bd9Sstevel@tonic-gate ASSERT(oldtk->tk_memb_list != NULL); 612*7c478bd9Sstevel@tonic-gate 613*7c478bd9Sstevel@tonic-gate mutex_enter(&p->p_zone->zone_nlwps_lock); 614*7c478bd9Sstevel@tonic-gate oldtk->tk_nlwps -= p->p_lwpcnt; 615*7c478bd9Sstevel@tonic-gate mutex_exit(&p->p_zone->zone_nlwps_lock); 616*7c478bd9Sstevel@tonic-gate 617*7c478bd9Sstevel@tonic-gate mutex_enter(&newtk->tk_zone->zone_nlwps_lock); 618*7c478bd9Sstevel@tonic-gate newtk->tk_nlwps += p->p_lwpcnt; 619*7c478bd9Sstevel@tonic-gate mutex_exit(&newtk->tk_zone->zone_nlwps_lock); 620*7c478bd9Sstevel@tonic-gate 621*7c478bd9Sstevel@tonic-gate task_detach(p); 622*7c478bd9Sstevel@tonic-gate task_begin(newtk, p); 623*7c478bd9Sstevel@tonic-gate } 624*7c478bd9Sstevel@tonic-gate 625*7c478bd9Sstevel@tonic-gate /* 626*7c478bd9Sstevel@tonic-gate * task_end() 627*7c478bd9Sstevel@tonic-gate * 628*7c478bd9Sstevel@tonic-gate * Overview 629*7c478bd9Sstevel@tonic-gate * task_end() contains the actions executed once the final member of 630*7c478bd9Sstevel@tonic-gate * a task has released the task, and all actions connected with the task, such 631*7c478bd9Sstevel@tonic-gate * as committing an accounting record to a file, are completed. It is called 632*7c478bd9Sstevel@tonic-gate * by the known last consumer of the task information. Additionally, 633*7c478bd9Sstevel@tonic-gate * task_end() must never refer to any process in the system. 634*7c478bd9Sstevel@tonic-gate * 635*7c478bd9Sstevel@tonic-gate * Return values 636*7c478bd9Sstevel@tonic-gate * None. 637*7c478bd9Sstevel@tonic-gate * 638*7c478bd9Sstevel@tonic-gate * Caller's context 639*7c478bd9Sstevel@tonic-gate * No restrictions on context, beyond that given above. 640*7c478bd9Sstevel@tonic-gate */ 641*7c478bd9Sstevel@tonic-gate void 642*7c478bd9Sstevel@tonic-gate task_end(task_t *tk) 643*7c478bd9Sstevel@tonic-gate { 644*7c478bd9Sstevel@tonic-gate ASSERT(tk->tk_hold_count == 0); 645*7c478bd9Sstevel@tonic-gate 646*7c478bd9Sstevel@tonic-gate project_rele(tk->tk_proj); 647*7c478bd9Sstevel@tonic-gate kmem_free(tk->tk_usage, sizeof (task_usage_t)); 648*7c478bd9Sstevel@tonic-gate if (tk->tk_prevusage != NULL) 649*7c478bd9Sstevel@tonic-gate kmem_free(tk->tk_prevusage, sizeof (task_usage_t)); 650*7c478bd9Sstevel@tonic-gate if (tk->tk_zoneusage != NULL) 651*7c478bd9Sstevel@tonic-gate kmem_free(tk->tk_zoneusage, sizeof (task_usage_t)); 652*7c478bd9Sstevel@tonic-gate rctl_set_free(tk->tk_rctls); 653*7c478bd9Sstevel@tonic-gate id_free(taskid_space, tk->tk_tkid); 654*7c478bd9Sstevel@tonic-gate zone_task_rele(tk->tk_zone); 655*7c478bd9Sstevel@tonic-gate kmem_cache_free(task_cache, tk); 656*7c478bd9Sstevel@tonic-gate } 657*7c478bd9Sstevel@tonic-gate 658*7c478bd9Sstevel@tonic-gate static void 659*7c478bd9Sstevel@tonic-gate changeproj(proc_t *p, kproject_t *kpj, zone_t *zone, void *projbuf, 660*7c478bd9Sstevel@tonic-gate void *zonebuf) 661*7c478bd9Sstevel@tonic-gate { 662*7c478bd9Sstevel@tonic-gate kproject_t *oldkpj; 663*7c478bd9Sstevel@tonic-gate kthread_t *t; 664*7c478bd9Sstevel@tonic-gate 665*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock)); 666*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock)); 667*7c478bd9Sstevel@tonic-gate 668*7c478bd9Sstevel@tonic-gate if ((t = p->p_tlist) != NULL) { 669*7c478bd9Sstevel@tonic-gate do { 670*7c478bd9Sstevel@tonic-gate (void) project_hold(kpj); 671*7c478bd9Sstevel@tonic-gate 672*7c478bd9Sstevel@tonic-gate thread_lock(t); 673*7c478bd9Sstevel@tonic-gate oldkpj = ttoproj(t); 674*7c478bd9Sstevel@tonic-gate t->t_proj = kpj; 675*7c478bd9Sstevel@tonic-gate t->t_pre_sys = 1; /* For cred update */ 676*7c478bd9Sstevel@tonic-gate thread_unlock(t); 677*7c478bd9Sstevel@tonic-gate fss_changeproj(t, kpj, zone, projbuf, zonebuf); 678*7c478bd9Sstevel@tonic-gate 679*7c478bd9Sstevel@tonic-gate project_rele(oldkpj); 680*7c478bd9Sstevel@tonic-gate } while ((t = t->t_forw) != p->p_tlist); 681*7c478bd9Sstevel@tonic-gate } 682*7c478bd9Sstevel@tonic-gate } 683*7c478bd9Sstevel@tonic-gate 684*7c478bd9Sstevel@tonic-gate /* 685*7c478bd9Sstevel@tonic-gate * task_join() 686*7c478bd9Sstevel@tonic-gate * 687*7c478bd9Sstevel@tonic-gate * Overview 688*7c478bd9Sstevel@tonic-gate * task_join() contains the actions that must be executed when the first 689*7c478bd9Sstevel@tonic-gate * member (curproc) of a newly created task joins it. It may never fail. 690*7c478bd9Sstevel@tonic-gate * 691*7c478bd9Sstevel@tonic-gate * The caller must make sure holdlwps() is called so that all other lwps are 692*7c478bd9Sstevel@tonic-gate * stopped prior to calling this function. 693*7c478bd9Sstevel@tonic-gate * 694*7c478bd9Sstevel@tonic-gate * NB: It returns with curproc->p_lock held. 695*7c478bd9Sstevel@tonic-gate * 696*7c478bd9Sstevel@tonic-gate * Return values 697*7c478bd9Sstevel@tonic-gate * Pointer to the old task. 698*7c478bd9Sstevel@tonic-gate * 699*7c478bd9Sstevel@tonic-gate * Caller's context 700*7c478bd9Sstevel@tonic-gate * cpu_lock must be held entering the function. It will acquire pidlock, 701*7c478bd9Sstevel@tonic-gate * p_crlock and p_lock during execution. 702*7c478bd9Sstevel@tonic-gate */ 703*7c478bd9Sstevel@tonic-gate task_t * 704*7c478bd9Sstevel@tonic-gate task_join(task_t *tk, uint_t flags) 705*7c478bd9Sstevel@tonic-gate { 706*7c478bd9Sstevel@tonic-gate proc_t *p = ttoproc(curthread); 707*7c478bd9Sstevel@tonic-gate task_t *prev_tk; 708*7c478bd9Sstevel@tonic-gate void *projbuf, *zonebuf; 709*7c478bd9Sstevel@tonic-gate zone_t *zone = tk->tk_zone; 710*7c478bd9Sstevel@tonic-gate projid_t projid = tk->tk_proj->kpj_id; 711*7c478bd9Sstevel@tonic-gate cred_t *oldcr; 712*7c478bd9Sstevel@tonic-gate 713*7c478bd9Sstevel@tonic-gate /* 714*7c478bd9Sstevel@tonic-gate * We can't know for sure if holdlwps() was called, but we can check to 715*7c478bd9Sstevel@tonic-gate * ensure we're single-threaded. 716*7c478bd9Sstevel@tonic-gate */ 717*7c478bd9Sstevel@tonic-gate ASSERT(curthread == p->p_agenttp || p->p_lwprcnt == 1); 718*7c478bd9Sstevel@tonic-gate 719*7c478bd9Sstevel@tonic-gate /* 720*7c478bd9Sstevel@tonic-gate * Changing the credential is always hard because we cannot 721*7c478bd9Sstevel@tonic-gate * allocate memory when holding locks but we don't know whether 722*7c478bd9Sstevel@tonic-gate * we need to change it. We first get a reference to the current 723*7c478bd9Sstevel@tonic-gate * cred if we need to change it. Then we create a credential 724*7c478bd9Sstevel@tonic-gate * with an updated project id. Finally we install it, first 725*7c478bd9Sstevel@tonic-gate * releasing the reference we had on the p_cred at the time we 726*7c478bd9Sstevel@tonic-gate * acquired the lock the first time and later we release the 727*7c478bd9Sstevel@tonic-gate * reference to p_cred at the time we acquired the lock the 728*7c478bd9Sstevel@tonic-gate * second time. 729*7c478bd9Sstevel@tonic-gate */ 730*7c478bd9Sstevel@tonic-gate mutex_enter(&p->p_crlock); 731*7c478bd9Sstevel@tonic-gate if (crgetprojid(p->p_cred) == projid) 732*7c478bd9Sstevel@tonic-gate oldcr = NULL; 733*7c478bd9Sstevel@tonic-gate else 734*7c478bd9Sstevel@tonic-gate crhold(oldcr = p->p_cred); 735*7c478bd9Sstevel@tonic-gate mutex_exit(&p->p_crlock); 736*7c478bd9Sstevel@tonic-gate 737*7c478bd9Sstevel@tonic-gate if (oldcr != NULL) { 738*7c478bd9Sstevel@tonic-gate cred_t *newcr = crdup(oldcr); 739*7c478bd9Sstevel@tonic-gate crsetprojid(newcr, projid); 740*7c478bd9Sstevel@tonic-gate crfree(oldcr); 741*7c478bd9Sstevel@tonic-gate 742*7c478bd9Sstevel@tonic-gate mutex_enter(&p->p_crlock); 743*7c478bd9Sstevel@tonic-gate oldcr = p->p_cred; 744*7c478bd9Sstevel@tonic-gate p->p_cred = newcr; 745*7c478bd9Sstevel@tonic-gate mutex_exit(&p->p_crlock); 746*7c478bd9Sstevel@tonic-gate crfree(oldcr); 747*7c478bd9Sstevel@tonic-gate } 748*7c478bd9Sstevel@tonic-gate 749*7c478bd9Sstevel@tonic-gate /* 750*7c478bd9Sstevel@tonic-gate * Make sure that the number of processor sets is constant 751*7c478bd9Sstevel@tonic-gate * across this operation. 752*7c478bd9Sstevel@tonic-gate */ 753*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 754*7c478bd9Sstevel@tonic-gate 755*7c478bd9Sstevel@tonic-gate projbuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_PROJ); 756*7c478bd9Sstevel@tonic-gate zonebuf = fss_allocbuf(FSS_NPSET_BUF, FSS_ALLOC_ZONE); 757*7c478bd9Sstevel@tonic-gate 758*7c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 759*7c478bd9Sstevel@tonic-gate mutex_enter(&p->p_lock); 760*7c478bd9Sstevel@tonic-gate 761*7c478bd9Sstevel@tonic-gate prev_tk = p->p_task; 762*7c478bd9Sstevel@tonic-gate task_change(tk, p); 763*7c478bd9Sstevel@tonic-gate 764*7c478bd9Sstevel@tonic-gate /* 765*7c478bd9Sstevel@tonic-gate * Now move threads one by one to their new project. 766*7c478bd9Sstevel@tonic-gate */ 767*7c478bd9Sstevel@tonic-gate changeproj(p, tk->tk_proj, zone, projbuf, zonebuf); 768*7c478bd9Sstevel@tonic-gate if (flags & TASK_FINAL) 769*7c478bd9Sstevel@tonic-gate p->p_task->tk_flags |= TASK_FINAL; 770*7c478bd9Sstevel@tonic-gate 771*7c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 772*7c478bd9Sstevel@tonic-gate 773*7c478bd9Sstevel@tonic-gate fss_freebuf(zonebuf, FSS_ALLOC_ZONE); 774*7c478bd9Sstevel@tonic-gate fss_freebuf(projbuf, FSS_ALLOC_PROJ); 775*7c478bd9Sstevel@tonic-gate return (prev_tk); 776*7c478bd9Sstevel@tonic-gate } 777*7c478bd9Sstevel@tonic-gate 778*7c478bd9Sstevel@tonic-gate /* 779*7c478bd9Sstevel@tonic-gate * rctl ops vectors 780*7c478bd9Sstevel@tonic-gate */ 781*7c478bd9Sstevel@tonic-gate static rctl_ops_t task_lwps_ops = { 782*7c478bd9Sstevel@tonic-gate rcop_no_action, 783*7c478bd9Sstevel@tonic-gate task_lwps_usage, 784*7c478bd9Sstevel@tonic-gate task_lwps_set, 785*7c478bd9Sstevel@tonic-gate task_lwps_test 786*7c478bd9Sstevel@tonic-gate }; 787*7c478bd9Sstevel@tonic-gate 788*7c478bd9Sstevel@tonic-gate static rctl_ops_t task_cpu_time_ops = { 789*7c478bd9Sstevel@tonic-gate rcop_no_action, 790*7c478bd9Sstevel@tonic-gate task_cpu_time_usage, 791*7c478bd9Sstevel@tonic-gate rcop_no_set, 792*7c478bd9Sstevel@tonic-gate task_cpu_time_test 793*7c478bd9Sstevel@tonic-gate }; 794*7c478bd9Sstevel@tonic-gate 795*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 796*7c478bd9Sstevel@tonic-gate /* 797*7c478bd9Sstevel@tonic-gate * void task_init(void) 798*7c478bd9Sstevel@tonic-gate * 799*7c478bd9Sstevel@tonic-gate * Overview 800*7c478bd9Sstevel@tonic-gate * task_init() initializes task-related hashes, caches, and the task id 801*7c478bd9Sstevel@tonic-gate * space. Additionally, task_init() establishes p0 as a member of task0. 802*7c478bd9Sstevel@tonic-gate * Called by main(). 803*7c478bd9Sstevel@tonic-gate * 804*7c478bd9Sstevel@tonic-gate * Return values 805*7c478bd9Sstevel@tonic-gate * None. 806*7c478bd9Sstevel@tonic-gate * 807*7c478bd9Sstevel@tonic-gate * Caller's context 808*7c478bd9Sstevel@tonic-gate * task_init() must be called prior to MP startup. 809*7c478bd9Sstevel@tonic-gate */ 810*7c478bd9Sstevel@tonic-gate void 811*7c478bd9Sstevel@tonic-gate task_init(void) 812*7c478bd9Sstevel@tonic-gate { 813*7c478bd9Sstevel@tonic-gate proc_t *p = &p0; 814*7c478bd9Sstevel@tonic-gate mod_hash_hndl_t hndl; 815*7c478bd9Sstevel@tonic-gate rctl_set_t *set; 816*7c478bd9Sstevel@tonic-gate rctl_alloc_gp_t *gp; 817*7c478bd9Sstevel@tonic-gate rctl_entity_p_t e; 818*7c478bd9Sstevel@tonic-gate /* 819*7c478bd9Sstevel@tonic-gate * Initialize task_cache and taskid_space. 820*7c478bd9Sstevel@tonic-gate */ 821*7c478bd9Sstevel@tonic-gate task_cache = kmem_cache_create("task_cache", sizeof (task_t), 822*7c478bd9Sstevel@tonic-gate 0, NULL, NULL, NULL, NULL, NULL, 0); 823*7c478bd9Sstevel@tonic-gate taskid_space = id_space_create("taskid_space", 0, MAX_TASKID); 824*7c478bd9Sstevel@tonic-gate 825*7c478bd9Sstevel@tonic-gate /* 826*7c478bd9Sstevel@tonic-gate * Initialize task hash table. 827*7c478bd9Sstevel@tonic-gate */ 828*7c478bd9Sstevel@tonic-gate task_hash = mod_hash_create_idhash("task_hash", task_hash_size, 829*7c478bd9Sstevel@tonic-gate mod_hash_null_valdtor); 830*7c478bd9Sstevel@tonic-gate 831*7c478bd9Sstevel@tonic-gate /* 832*7c478bd9Sstevel@tonic-gate * Initialize task-based rctls. 833*7c478bd9Sstevel@tonic-gate */ 834*7c478bd9Sstevel@tonic-gate rc_task_lwps = rctl_register("task.max-lwps", RCENTITY_TASK, 835*7c478bd9Sstevel@tonic-gate RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_COUNT, INT_MAX, INT_MAX, 836*7c478bd9Sstevel@tonic-gate &task_lwps_ops); 837*7c478bd9Sstevel@tonic-gate rc_task_cpu_time = rctl_register("task.max-cpu-time", RCENTITY_TASK, 838*7c478bd9Sstevel@tonic-gate RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_DENY_NEVER | 839*7c478bd9Sstevel@tonic-gate RCTL_GLOBAL_CPU_TIME | RCTL_GLOBAL_INFINITE | 840*7c478bd9Sstevel@tonic-gate RCTL_GLOBAL_UNOBSERVABLE | RCTL_GLOBAL_SECONDS, UINT64_MAX, 841*7c478bd9Sstevel@tonic-gate UINT64_MAX, &task_cpu_time_ops); 842*7c478bd9Sstevel@tonic-gate 843*7c478bd9Sstevel@tonic-gate /* 844*7c478bd9Sstevel@tonic-gate * Create task0 and place p0 in it as a member. 845*7c478bd9Sstevel@tonic-gate */ 846*7c478bd9Sstevel@tonic-gate task0p = kmem_cache_alloc(task_cache, KM_SLEEP); 847*7c478bd9Sstevel@tonic-gate bzero(task0p, sizeof (task_t)); 848*7c478bd9Sstevel@tonic-gate 849*7c478bd9Sstevel@tonic-gate task0p->tk_tkid = id_alloc(taskid_space); 850*7c478bd9Sstevel@tonic-gate task0p->tk_usage = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP); 851*7c478bd9Sstevel@tonic-gate task0p->tk_proj = project_hold_by_id(0, GLOBAL_ZONEID, 852*7c478bd9Sstevel@tonic-gate PROJECT_HOLD_INSERT); 853*7c478bd9Sstevel@tonic-gate task0p->tk_flags = TASK_NORMAL; 854*7c478bd9Sstevel@tonic-gate task0p->tk_nlwps = p->p_lwpcnt; 855*7c478bd9Sstevel@tonic-gate task0p->tk_zone = global_zone; 856*7c478bd9Sstevel@tonic-gate 857*7c478bd9Sstevel@tonic-gate set = rctl_set_create(); 858*7c478bd9Sstevel@tonic-gate gp = rctl_set_init_prealloc(RCENTITY_TASK); 859*7c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 860*7c478bd9Sstevel@tonic-gate e.rcep_p.task = task0p; 861*7c478bd9Sstevel@tonic-gate e.rcep_t = RCENTITY_TASK; 862*7c478bd9Sstevel@tonic-gate task0p->tk_rctls = rctl_set_init(RCENTITY_TASK, curproc, &e, set, gp); 863*7c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 864*7c478bd9Sstevel@tonic-gate rctl_prealloc_destroy(gp); 865*7c478bd9Sstevel@tonic-gate 866*7c478bd9Sstevel@tonic-gate (void) mod_hash_reserve(task_hash, &hndl); 867*7c478bd9Sstevel@tonic-gate mutex_enter(&task_hash_lock); 868*7c478bd9Sstevel@tonic-gate ASSERT(task_find(task0p->tk_tkid, GLOBAL_ZONEID) == NULL); 869*7c478bd9Sstevel@tonic-gate if (mod_hash_insert_reserve(task_hash, 870*7c478bd9Sstevel@tonic-gate (mod_hash_key_t)(uintptr_t)task0p->tk_tkid, 871*7c478bd9Sstevel@tonic-gate (mod_hash_val_t *)task0p, hndl) != 0) { 872*7c478bd9Sstevel@tonic-gate mod_hash_cancel(task_hash, &hndl); 873*7c478bd9Sstevel@tonic-gate panic("unable to insert task %d(%p)", task0p->tk_tkid, 874*7c478bd9Sstevel@tonic-gate (void *)task0p); 875*7c478bd9Sstevel@tonic-gate } 876*7c478bd9Sstevel@tonic-gate mutex_exit(&task_hash_lock); 877*7c478bd9Sstevel@tonic-gate 878*7c478bd9Sstevel@tonic-gate task0p->tk_memb_list = p; 879*7c478bd9Sstevel@tonic-gate 880*7c478bd9Sstevel@tonic-gate /* 881*7c478bd9Sstevel@tonic-gate * Initialize task pointers for p0, including doubly linked list of task 882*7c478bd9Sstevel@tonic-gate * members. 883*7c478bd9Sstevel@tonic-gate */ 884*7c478bd9Sstevel@tonic-gate p->p_task = task0p; 885*7c478bd9Sstevel@tonic-gate p->p_taskprev = p->p_tasknext = p; 886*7c478bd9Sstevel@tonic-gate task_hold(task0p); 887*7c478bd9Sstevel@tonic-gate } 888