xref: /titanic_53/usr/src/uts/common/os/pool.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate #include <sys/pool.h>
30*7c478bd9Sstevel@tonic-gate #include <sys/pool_impl.h>
31*7c478bd9Sstevel@tonic-gate #include <sys/pool_pset.h>
32*7c478bd9Sstevel@tonic-gate #include <sys/id_space.h>
33*7c478bd9Sstevel@tonic-gate #include <sys/mutex.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/nvpair.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/errno.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/fss.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/class.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/exacct.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/utsname.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/procset.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/zone.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/policy.h>
48*7c478bd9Sstevel@tonic-gate 
49*7c478bd9Sstevel@tonic-gate /*
50*7c478bd9Sstevel@tonic-gate  * RESOURCE POOLS
51*7c478bd9Sstevel@tonic-gate  *
52*7c478bd9Sstevel@tonic-gate  * The resource pools facility brings together process-bindable resource into
53*7c478bd9Sstevel@tonic-gate  * a common abstraction called a pool. Processor sets and other entities can
54*7c478bd9Sstevel@tonic-gate  * be configured, grouped, and labelled such that workload components can be
55*7c478bd9Sstevel@tonic-gate  * associated with a subset of a system's total resources.
56*7c478bd9Sstevel@tonic-gate  *
57*7c478bd9Sstevel@tonic-gate  * When disabled, the pools facility is "invisible".  All processes belong
58*7c478bd9Sstevel@tonic-gate  * to the same pool (pool_default), and processor sets can be managed through
59*7c478bd9Sstevel@tonic-gate  * the old pset() system call.  When enabled, processor sets can only be
60*7c478bd9Sstevel@tonic-gate  * managed via the pools facility.  New pools can be created and associated
61*7c478bd9Sstevel@tonic-gate  * with processor sets.  Processes can be bound to pools which have non-empty
62*7c478bd9Sstevel@tonic-gate  * resource sets.
63*7c478bd9Sstevel@tonic-gate  *
64*7c478bd9Sstevel@tonic-gate  * Locking: pool_lock() protects global pools state and must be called
65*7c478bd9Sstevel@tonic-gate  * before modifying the configuration, or when taking a snapshot of the
66*7c478bd9Sstevel@tonic-gate  * configuration.  If pool_lock_intr() is used, the operation may be
67*7c478bd9Sstevel@tonic-gate  * interrupted by a signal or a request.
68*7c478bd9Sstevel@tonic-gate  *
69*7c478bd9Sstevel@tonic-gate  * To prevent processes from being rebound between pools while they are
70*7c478bd9Sstevel@tonic-gate  * the middle of an operation which affects resource set bindings, such
71*7c478bd9Sstevel@tonic-gate  * operations must be surrounded by calls to pool_barrier_enter() and
72*7c478bd9Sstevel@tonic-gate  * pool_barrier_exit().  This mechanism guarantees that such processes will
73*7c478bd9Sstevel@tonic-gate  * be stopped either at the beginning or at the end of the barrier so that
74*7c478bd9Sstevel@tonic-gate  * the rebind operation can atomically bind the process and its threads
75*7c478bd9Sstevel@tonic-gate  * to new resource sets, and then let process run again.
76*7c478bd9Sstevel@tonic-gate  *
77*7c478bd9Sstevel@tonic-gate  * Lock ordering with respect to other locks is as follows:
78*7c478bd9Sstevel@tonic-gate  *
79*7c478bd9Sstevel@tonic-gate  * 	pool_lock() -> cpu_lock -> pidlock -> p_lock -> pool_barrier_lock
80*7c478bd9Sstevel@tonic-gate  *
81*7c478bd9Sstevel@tonic-gate  * Most static and global variables defined in this file are protected
82*7c478bd9Sstevel@tonic-gate  * by calling pool_lock().
83*7c478bd9Sstevel@tonic-gate  *
84*7c478bd9Sstevel@tonic-gate  * The operation that binds tasks and projects to pools is atomic.  That is,
85*7c478bd9Sstevel@tonic-gate  * either all processes in a given task or a project will be bound to a
86*7c478bd9Sstevel@tonic-gate  * new pool, or (in case of an error) they will be all left bound to the
87*7c478bd9Sstevel@tonic-gate  * old pool. Processes in a given task or a given project can only be bound to
88*7c478bd9Sstevel@tonic-gate  * different pools if they were rebound individually one by one as single
89*7c478bd9Sstevel@tonic-gate  * processes.  Threads or LWPs of the same process do not have pool bindings,
90*7c478bd9Sstevel@tonic-gate  * and are bound to the same resource sets associated with the resource pool
91*7c478bd9Sstevel@tonic-gate  * of that process.
92*7c478bd9Sstevel@tonic-gate  *
93*7c478bd9Sstevel@tonic-gate  * The following picture shows one possible pool configuration with three
94*7c478bd9Sstevel@tonic-gate  * pools and three processor sets.  Note that processor set "foo" is not
95*7c478bd9Sstevel@tonic-gate  * associated with any pools and therefore cannot have any processes
96*7c478bd9Sstevel@tonic-gate  * bound to it.  Two pools (default and foo) are associated with the
97*7c478bd9Sstevel@tonic-gate  * same processor set (default).  Also, note that processes in Task 2
98*7c478bd9Sstevel@tonic-gate  * are bound to different pools.
99*7c478bd9Sstevel@tonic-gate  *
100*7c478bd9Sstevel@tonic-gate  *
101*7c478bd9Sstevel@tonic-gate  *							       Processor Sets
102*7c478bd9Sstevel@tonic-gate  *								+---------+
103*7c478bd9Sstevel@tonic-gate  *		       +--------------+========================>| default |
104*7c478bd9Sstevel@tonic-gate  *		      a|	      |				+---------+
105*7c478bd9Sstevel@tonic-gate  *		      s|	      |				    ||
106*7c478bd9Sstevel@tonic-gate  *		      s|	      |				+---------+
107*7c478bd9Sstevel@tonic-gate  *		      o|	      |				|   foo   |
108*7c478bd9Sstevel@tonic-gate  *		      c|	      |				+---------+
109*7c478bd9Sstevel@tonic-gate  *		      i|	      |				    ||
110*7c478bd9Sstevel@tonic-gate  *		      a|	      |				+---------+
111*7c478bd9Sstevel@tonic-gate  *		      t|	      |			+------>|   bar   |
112*7c478bd9Sstevel@tonic-gate  *		      e|	      |			|	+---------+
113*7c478bd9Sstevel@tonic-gate  *                    d|              |                 |
114*7c478bd9Sstevel@tonic-gate  *                     |              |                 |
115*7c478bd9Sstevel@tonic-gate  *	       +---------+      +---------+      +---------+
116*7c478bd9Sstevel@tonic-gate  *     Pools   | default |======|   foo   |======|   bar   |
117*7c478bd9Sstevel@tonic-gate  *	       +---------+      +---------+      +---------+
118*7c478bd9Sstevel@tonic-gate  *	           @  @            @              @ @   @
119*7c478bd9Sstevel@tonic-gate  *                b|  |            |              | |   |
120*7c478bd9Sstevel@tonic-gate  *                o|  |            |              | |   |
121*7c478bd9Sstevel@tonic-gate  *                u|  +-----+      |      +-------+ |   +---+
122*7c478bd9Sstevel@tonic-gate  *                n|        |      |      |         |       |
123*7c478bd9Sstevel@tonic-gate  *            ....d|........|......|......|.........|.......|....
124*7c478bd9Sstevel@tonic-gate  *            :    |   ::   |      |      |    ::   |       |   :
125*7c478bd9Sstevel@tonic-gate  *            :  +---+ :: +---+  +---+  +---+  :: +---+   +---+ :
126*7c478bd9Sstevel@tonic-gate  *  Processes :  | p | :: | p |  | p |  | p |  :: | p |...| p | :
127*7c478bd9Sstevel@tonic-gate  *            :  +---+ :: +---+  +---+  +---+  :: +---+   +---+ :
128*7c478bd9Sstevel@tonic-gate  *            :........::......................::...............:
129*7c478bd9Sstevel@tonic-gate  *              Task 1            Task 2              Task N
130*7c478bd9Sstevel@tonic-gate  *                 |                 |                  |
131*7c478bd9Sstevel@tonic-gate  *                 |                 |                  |
132*7c478bd9Sstevel@tonic-gate  *                 |  +-----------+  |             +-----------+
133*7c478bd9Sstevel@tonic-gate  *                 +--| Project 1 |--+             | Project N |
134*7c478bd9Sstevel@tonic-gate  *                    +-----------+                +-----------+
135*7c478bd9Sstevel@tonic-gate  *
136*7c478bd9Sstevel@tonic-gate  * This is just an illustration of relationships between processes, tasks,
137*7c478bd9Sstevel@tonic-gate  * projects, pools, and processor sets. New types of resource sets will be
138*7c478bd9Sstevel@tonic-gate  * added in the future.
139*7c478bd9Sstevel@tonic-gate  */
140*7c478bd9Sstevel@tonic-gate 
141*7c478bd9Sstevel@tonic-gate pool_t		*pool_default;	/* default pool which always exists */
142*7c478bd9Sstevel@tonic-gate int		pool_count;	/* number of pools created on this system */
143*7c478bd9Sstevel@tonic-gate int		pool_state;	/* pools state -- enabled/disabled */
144*7c478bd9Sstevel@tonic-gate void		*pool_buf;	/* pre-commit snapshot of the pools state */
145*7c478bd9Sstevel@tonic-gate size_t		pool_bufsz;	/* size of pool_buf */
146*7c478bd9Sstevel@tonic-gate static hrtime_t	pool_pool_mod;	/* last modification time for pools */
147*7c478bd9Sstevel@tonic-gate static hrtime_t	pool_sys_mod;	/* last modification time for system */
148*7c478bd9Sstevel@tonic-gate static nvlist_t	*pool_sys_prop;	/* system properties */
149*7c478bd9Sstevel@tonic-gate static id_space_t *pool_ids;	/* pool ID space */
150*7c478bd9Sstevel@tonic-gate static list_t	pool_list;	/* doubly-linked list of pools */
151*7c478bd9Sstevel@tonic-gate static kmutex_t		pool_mutex;		/* protects pool_busy_* */
152*7c478bd9Sstevel@tonic-gate static kcondvar_t	pool_busy_cv;		/* waiting for "pool_lock" */
153*7c478bd9Sstevel@tonic-gate static kthread_t	*pool_busy_thread;	/* thread holding "pool_lock" */
154*7c478bd9Sstevel@tonic-gate static kmutex_t		pool_barrier_lock;	/* synch. with pool_barrier_* */
155*7c478bd9Sstevel@tonic-gate static kcondvar_t	pool_barrier_cv;	/* synch. with pool_barrier_* */
156*7c478bd9Sstevel@tonic-gate static int		pool_barrier_count;	/* synch. with pool_barrier_* */
157*7c478bd9Sstevel@tonic-gate 
158*7c478bd9Sstevel@tonic-gate /*
159*7c478bd9Sstevel@tonic-gate  * Boot-time pool initialization.
160*7c478bd9Sstevel@tonic-gate  */
161*7c478bd9Sstevel@tonic-gate void
162*7c478bd9Sstevel@tonic-gate pool_init(void)
163*7c478bd9Sstevel@tonic-gate {
164*7c478bd9Sstevel@tonic-gate 	pool_ids = id_space_create("pool_ids", POOL_DEFAULT + 1, POOL_MAXID);
165*7c478bd9Sstevel@tonic-gate 
166*7c478bd9Sstevel@tonic-gate 	/*
167*7c478bd9Sstevel@tonic-gate 	 * Initialize default pool.
168*7c478bd9Sstevel@tonic-gate 	 */
169*7c478bd9Sstevel@tonic-gate 	pool_default = kmem_zalloc(sizeof (pool_t), KM_SLEEP);
170*7c478bd9Sstevel@tonic-gate 	pool_default->pool_id = POOL_DEFAULT;
171*7c478bd9Sstevel@tonic-gate 	list_create(&pool_list, sizeof (pool_t), offsetof(pool_t, pool_link));
172*7c478bd9Sstevel@tonic-gate 	list_insert_head(&pool_list, pool_default);
173*7c478bd9Sstevel@tonic-gate 
174*7c478bd9Sstevel@tonic-gate 	/*
175*7c478bd9Sstevel@tonic-gate 	 * Initialize plugins for resource sets.
176*7c478bd9Sstevel@tonic-gate 	 */
177*7c478bd9Sstevel@tonic-gate 	pool_pset_init();
178*7c478bd9Sstevel@tonic-gate 	pool_count = 1;
179*7c478bd9Sstevel@tonic-gate 	p0.p_pool = pool_default;
180*7c478bd9Sstevel@tonic-gate 	global_zone->zone_pool = pool_default;
181*7c478bd9Sstevel@tonic-gate 	pool_default->pool_ref = 1;
182*7c478bd9Sstevel@tonic-gate }
183*7c478bd9Sstevel@tonic-gate 
184*7c478bd9Sstevel@tonic-gate /*
185*7c478bd9Sstevel@tonic-gate  * Synchronization routines.
186*7c478bd9Sstevel@tonic-gate  *
187*7c478bd9Sstevel@tonic-gate  * pool_lock is only called from syscall-level routines (processor_bind(),
188*7c478bd9Sstevel@tonic-gate  * pset_*(), and /dev/pool ioctls).  The pool "lock" may be held for long
189*7c478bd9Sstevel@tonic-gate  * periods of time, including across sleeping operations, so we allow its
190*7c478bd9Sstevel@tonic-gate  * acquisition to be interruptible.
191*7c478bd9Sstevel@tonic-gate  *
192*7c478bd9Sstevel@tonic-gate  * The current thread that owns the "lock" is stored in the variable
193*7c478bd9Sstevel@tonic-gate  * pool_busy_thread, both to let pool_lock_held() work and to aid debugging.
194*7c478bd9Sstevel@tonic-gate  */
195*7c478bd9Sstevel@tonic-gate void
196*7c478bd9Sstevel@tonic-gate pool_lock(void)
197*7c478bd9Sstevel@tonic-gate {
198*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pool_mutex);
199*7c478bd9Sstevel@tonic-gate 	while (pool_busy_thread != NULL)
200*7c478bd9Sstevel@tonic-gate 		cv_wait(&pool_busy_cv, &pool_mutex);
201*7c478bd9Sstevel@tonic-gate 	pool_busy_thread = curthread;
202*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pool_mutex);
203*7c478bd9Sstevel@tonic-gate }
204*7c478bd9Sstevel@tonic-gate 
205*7c478bd9Sstevel@tonic-gate int
206*7c478bd9Sstevel@tonic-gate pool_lock_intr(void)
207*7c478bd9Sstevel@tonic-gate {
208*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pool_mutex);
209*7c478bd9Sstevel@tonic-gate 	while (pool_busy_thread != NULL) {
210*7c478bd9Sstevel@tonic-gate 		if (cv_wait_sig(&pool_busy_cv, &pool_mutex) == 0) {
211*7c478bd9Sstevel@tonic-gate 			cv_signal(&pool_busy_cv);
212*7c478bd9Sstevel@tonic-gate 			mutex_exit(&pool_mutex);
213*7c478bd9Sstevel@tonic-gate 			return (1);
214*7c478bd9Sstevel@tonic-gate 		}
215*7c478bd9Sstevel@tonic-gate 	}
216*7c478bd9Sstevel@tonic-gate 	pool_busy_thread = curthread;
217*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pool_mutex);
218*7c478bd9Sstevel@tonic-gate 	return (0);
219*7c478bd9Sstevel@tonic-gate }
220*7c478bd9Sstevel@tonic-gate 
221*7c478bd9Sstevel@tonic-gate int
222*7c478bd9Sstevel@tonic-gate pool_lock_held(void)
223*7c478bd9Sstevel@tonic-gate {
224*7c478bd9Sstevel@tonic-gate 	return (pool_busy_thread == curthread);
225*7c478bd9Sstevel@tonic-gate }
226*7c478bd9Sstevel@tonic-gate 
227*7c478bd9Sstevel@tonic-gate void
228*7c478bd9Sstevel@tonic-gate pool_unlock(void)
229*7c478bd9Sstevel@tonic-gate {
230*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pool_mutex);
231*7c478bd9Sstevel@tonic-gate 	pool_busy_thread = NULL;
232*7c478bd9Sstevel@tonic-gate 	cv_signal(&pool_busy_cv);
233*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pool_mutex);
234*7c478bd9Sstevel@tonic-gate }
235*7c478bd9Sstevel@tonic-gate 
236*7c478bd9Sstevel@tonic-gate /*
237*7c478bd9Sstevel@tonic-gate  * Routines allowing fork(), exec(), exit(), and lwp_create() to synchronize
238*7c478bd9Sstevel@tonic-gate  * with pool_do_bind().
239*7c478bd9Sstevel@tonic-gate  *
240*7c478bd9Sstevel@tonic-gate  * Calls to pool_barrier_enter() and pool_barrier_exit() must bracket all
241*7c478bd9Sstevel@tonic-gate  * operations which modify pool or pset associations.  They can be called
242*7c478bd9Sstevel@tonic-gate  * while the process is multi-threaded.  In the common case, when current
243*7c478bd9Sstevel@tonic-gate  * process is not being rebound (PBWAIT flag is not set), these functions
244*7c478bd9Sstevel@tonic-gate  * will be just incrementing and decrementing reference counts.
245*7c478bd9Sstevel@tonic-gate  */
246*7c478bd9Sstevel@tonic-gate void
247*7c478bd9Sstevel@tonic-gate pool_barrier_enter(void)
248*7c478bd9Sstevel@tonic-gate {
249*7c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
250*7c478bd9Sstevel@tonic-gate 
251*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
252*7c478bd9Sstevel@tonic-gate 	while (p->p_poolflag & PBWAIT)
253*7c478bd9Sstevel@tonic-gate 		cv_wait(&p->p_poolcv, &p->p_lock);
254*7c478bd9Sstevel@tonic-gate 	p->p_poolcnt++;
255*7c478bd9Sstevel@tonic-gate }
256*7c478bd9Sstevel@tonic-gate 
257*7c478bd9Sstevel@tonic-gate void
258*7c478bd9Sstevel@tonic-gate pool_barrier_exit(void)
259*7c478bd9Sstevel@tonic-gate {
260*7c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
261*7c478bd9Sstevel@tonic-gate 
262*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
263*7c478bd9Sstevel@tonic-gate 	ASSERT(p->p_poolcnt > 0);
264*7c478bd9Sstevel@tonic-gate 	p->p_poolcnt--;
265*7c478bd9Sstevel@tonic-gate 	if (p->p_poolflag & PBWAIT) {
266*7c478bd9Sstevel@tonic-gate 		mutex_enter(&pool_barrier_lock);
267*7c478bd9Sstevel@tonic-gate 		ASSERT(pool_barrier_count > 0);
268*7c478bd9Sstevel@tonic-gate 		pool_barrier_count--;
269*7c478bd9Sstevel@tonic-gate 		if (pool_barrier_count == 0)
270*7c478bd9Sstevel@tonic-gate 			cv_signal(&pool_barrier_cv);
271*7c478bd9Sstevel@tonic-gate 		mutex_exit(&pool_barrier_lock);
272*7c478bd9Sstevel@tonic-gate 		while (p->p_poolflag & PBWAIT)
273*7c478bd9Sstevel@tonic-gate 			cv_wait(&p->p_poolcv, &p->p_lock);
274*7c478bd9Sstevel@tonic-gate 	}
275*7c478bd9Sstevel@tonic-gate }
276*7c478bd9Sstevel@tonic-gate 
277*7c478bd9Sstevel@tonic-gate /*
278*7c478bd9Sstevel@tonic-gate  * Enable pools facility.
279*7c478bd9Sstevel@tonic-gate  */
280*7c478bd9Sstevel@tonic-gate static int
281*7c478bd9Sstevel@tonic-gate pool_enable(void)
282*7c478bd9Sstevel@tonic-gate {
283*7c478bd9Sstevel@tonic-gate 	int ret;
284*7c478bd9Sstevel@tonic-gate 
285*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
286*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_count == 1);
287*7c478bd9Sstevel@tonic-gate 
288*7c478bd9Sstevel@tonic-gate 	ret = pool_pset_enable();
289*7c478bd9Sstevel@tonic-gate 	if (ret != 0)
290*7c478bd9Sstevel@tonic-gate 		return (ret);
291*7c478bd9Sstevel@tonic-gate 	(void) nvlist_alloc(&pool_sys_prop, NV_UNIQUE_NAME, KM_SLEEP);
292*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool_sys_prop, "system.name",
293*7c478bd9Sstevel@tonic-gate 	    utsname.nodename);
294*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool_sys_prop, "system.comment", "");
295*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_int64(pool_sys_prop, "system.version", 1);
296*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_byte(pool_sys_prop, "system.bind-default", 1);
297*7c478bd9Sstevel@tonic-gate 
298*7c478bd9Sstevel@tonic-gate 	(void) nvlist_alloc(&pool_default->pool_props,
299*7c478bd9Sstevel@tonic-gate 	    NV_UNIQUE_NAME, KM_SLEEP);
300*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool_default->pool_props,
301*7c478bd9Sstevel@tonic-gate 	    "pool.name", "pool_default");
302*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool_default->pool_props, "pool.comment", "");
303*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_byte(pool_default->pool_props, "pool.default", 1);
304*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_byte(pool_default->pool_props, "pool.active", 1);
305*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_int64(pool_default->pool_props,
306*7c478bd9Sstevel@tonic-gate 	    "pool.importance", 1);
307*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_int64(pool_default->pool_props, "pool.sys_id",
308*7c478bd9Sstevel@tonic-gate 	    pool_default->pool_id);
309*7c478bd9Sstevel@tonic-gate 
310*7c478bd9Sstevel@tonic-gate 	pool_sys_mod = pool_pool_mod = gethrtime();
311*7c478bd9Sstevel@tonic-gate 
312*7c478bd9Sstevel@tonic-gate 	return (ret);
313*7c478bd9Sstevel@tonic-gate }
314*7c478bd9Sstevel@tonic-gate 
315*7c478bd9Sstevel@tonic-gate /*
316*7c478bd9Sstevel@tonic-gate  * Disable pools facility.
317*7c478bd9Sstevel@tonic-gate  */
318*7c478bd9Sstevel@tonic-gate static int
319*7c478bd9Sstevel@tonic-gate pool_disable(void)
320*7c478bd9Sstevel@tonic-gate {
321*7c478bd9Sstevel@tonic-gate 	int ret;
322*7c478bd9Sstevel@tonic-gate 
323*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
324*7c478bd9Sstevel@tonic-gate 
325*7c478bd9Sstevel@tonic-gate 	if (pool_count > 1)	/* must destroy all pools first */
326*7c478bd9Sstevel@tonic-gate 		return (EBUSY);
327*7c478bd9Sstevel@tonic-gate 
328*7c478bd9Sstevel@tonic-gate 	ret = pool_pset_disable();
329*7c478bd9Sstevel@tonic-gate 	if (ret != 0)
330*7c478bd9Sstevel@tonic-gate 		return (ret);
331*7c478bd9Sstevel@tonic-gate 	if (pool_sys_prop != NULL) {
332*7c478bd9Sstevel@tonic-gate 		nvlist_free(pool_sys_prop);
333*7c478bd9Sstevel@tonic-gate 		pool_sys_prop = NULL;
334*7c478bd9Sstevel@tonic-gate 	}
335*7c478bd9Sstevel@tonic-gate 	if (pool_default->pool_props != NULL) {
336*7c478bd9Sstevel@tonic-gate 		nvlist_free(pool_default->pool_props);
337*7c478bd9Sstevel@tonic-gate 		pool_default->pool_props = NULL;
338*7c478bd9Sstevel@tonic-gate 	}
339*7c478bd9Sstevel@tonic-gate 	return (0);
340*7c478bd9Sstevel@tonic-gate }
341*7c478bd9Sstevel@tonic-gate 
342*7c478bd9Sstevel@tonic-gate pool_t *
343*7c478bd9Sstevel@tonic-gate pool_lookup_pool_by_name(char *name)
344*7c478bd9Sstevel@tonic-gate {
345*7c478bd9Sstevel@tonic-gate 	pool_t *pool = pool_default;
346*7c478bd9Sstevel@tonic-gate 	char *p;
347*7c478bd9Sstevel@tonic-gate 
348*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
349*7c478bd9Sstevel@tonic-gate 	for (pool = list_head(&pool_list); pool;
350*7c478bd9Sstevel@tonic-gate 	    pool = list_next(&pool_list, pool)) {
351*7c478bd9Sstevel@tonic-gate 		if (nvlist_lookup_string(pool->pool_props,
352*7c478bd9Sstevel@tonic-gate 		    "pool.name", &p) == 0 && strcmp(name, p) == 0)
353*7c478bd9Sstevel@tonic-gate 			return (pool);
354*7c478bd9Sstevel@tonic-gate 	}
355*7c478bd9Sstevel@tonic-gate 	return (NULL);
356*7c478bd9Sstevel@tonic-gate }
357*7c478bd9Sstevel@tonic-gate 
358*7c478bd9Sstevel@tonic-gate pool_t *
359*7c478bd9Sstevel@tonic-gate pool_lookup_pool_by_id(poolid_t poolid)
360*7c478bd9Sstevel@tonic-gate {
361*7c478bd9Sstevel@tonic-gate 	pool_t *pool = pool_default;
362*7c478bd9Sstevel@tonic-gate 
363*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
364*7c478bd9Sstevel@tonic-gate 	for (pool = list_head(&pool_list); pool;
365*7c478bd9Sstevel@tonic-gate 	    pool = list_next(&pool_list, pool)) {
366*7c478bd9Sstevel@tonic-gate 		if (pool->pool_id == poolid)
367*7c478bd9Sstevel@tonic-gate 			return (pool);
368*7c478bd9Sstevel@tonic-gate 	}
369*7c478bd9Sstevel@tonic-gate 	return (NULL);
370*7c478bd9Sstevel@tonic-gate }
371*7c478bd9Sstevel@tonic-gate 
372*7c478bd9Sstevel@tonic-gate /*
373*7c478bd9Sstevel@tonic-gate  * Create new pool, associate it with default resource sets, and give
374*7c478bd9Sstevel@tonic-gate  * it a temporary name.
375*7c478bd9Sstevel@tonic-gate  */
376*7c478bd9Sstevel@tonic-gate static int
377*7c478bd9Sstevel@tonic-gate pool_pool_create(poolid_t *poolid)
378*7c478bd9Sstevel@tonic-gate {
379*7c478bd9Sstevel@tonic-gate 	pool_t *pool;
380*7c478bd9Sstevel@tonic-gate 	char pool_name[40];
381*7c478bd9Sstevel@tonic-gate 
382*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
383*7c478bd9Sstevel@tonic-gate 
384*7c478bd9Sstevel@tonic-gate 	pool = kmem_zalloc(sizeof (pool_t), KM_SLEEP);
385*7c478bd9Sstevel@tonic-gate 	pool->pool_id = *poolid = id_alloc(pool_ids);
386*7c478bd9Sstevel@tonic-gate 	pool->pool_pset = pool_pset_default;
387*7c478bd9Sstevel@tonic-gate 	pool_pset_default->pset_npools++;
388*7c478bd9Sstevel@tonic-gate 	list_insert_tail(&pool_list, pool);
389*7c478bd9Sstevel@tonic-gate 	(void) nvlist_alloc(&pool->pool_props, NV_UNIQUE_NAME, KM_SLEEP);
390*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_int64(pool->pool_props, "pool.sys_id", pool->pool_id);
391*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_byte(pool->pool_props, "pool.default", 0);
392*7c478bd9Sstevel@tonic-gate 	pool_pool_mod = gethrtime();
393*7c478bd9Sstevel@tonic-gate 	(void) snprintf(pool_name, sizeof (pool_name), "pool_%lld",
394*7c478bd9Sstevel@tonic-gate 	    pool_pool_mod);
395*7c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool->pool_props, "pool.name", pool_name);
396*7c478bd9Sstevel@tonic-gate 	pool_count++;
397*7c478bd9Sstevel@tonic-gate 	return (0);
398*7c478bd9Sstevel@tonic-gate }
399*7c478bd9Sstevel@tonic-gate 
400*7c478bd9Sstevel@tonic-gate struct destroy_zone_arg {
401*7c478bd9Sstevel@tonic-gate 	pool_t *old;
402*7c478bd9Sstevel@tonic-gate 	pool_t *new;
403*7c478bd9Sstevel@tonic-gate };
404*7c478bd9Sstevel@tonic-gate 
405*7c478bd9Sstevel@tonic-gate /*
406*7c478bd9Sstevel@tonic-gate  * Update pool pointers for zones that are currently bound to pool "old"
407*7c478bd9Sstevel@tonic-gate  * to be bound to pool "new".
408*7c478bd9Sstevel@tonic-gate  */
409*7c478bd9Sstevel@tonic-gate static int
410*7c478bd9Sstevel@tonic-gate pool_destroy_zone_cb(zone_t *zone, void *arg)
411*7c478bd9Sstevel@tonic-gate {
412*7c478bd9Sstevel@tonic-gate 	struct destroy_zone_arg *dza = arg;
413*7c478bd9Sstevel@tonic-gate 
414*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
415*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
416*7c478bd9Sstevel@tonic-gate 
417*7c478bd9Sstevel@tonic-gate 	if (zone_pool_get(zone) == dza->old)
418*7c478bd9Sstevel@tonic-gate 		zone_pool_set(zone, dza->new);
419*7c478bd9Sstevel@tonic-gate 	return (0);
420*7c478bd9Sstevel@tonic-gate }
421*7c478bd9Sstevel@tonic-gate 
422*7c478bd9Sstevel@tonic-gate /*
423*7c478bd9Sstevel@tonic-gate  * Destroy specified pool, and rebind all processes in it
424*7c478bd9Sstevel@tonic-gate  * to the default pool.
425*7c478bd9Sstevel@tonic-gate  */
426*7c478bd9Sstevel@tonic-gate static int
427*7c478bd9Sstevel@tonic-gate pool_pool_destroy(poolid_t poolid)
428*7c478bd9Sstevel@tonic-gate {
429*7c478bd9Sstevel@tonic-gate 	pool_t *pool;
430*7c478bd9Sstevel@tonic-gate 	int ret;
431*7c478bd9Sstevel@tonic-gate 
432*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
433*7c478bd9Sstevel@tonic-gate 
434*7c478bd9Sstevel@tonic-gate 	if (poolid == POOL_DEFAULT)
435*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
436*7c478bd9Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
437*7c478bd9Sstevel@tonic-gate 		return (ESRCH);
438*7c478bd9Sstevel@tonic-gate 	ret = pool_do_bind(pool_default, P_POOLID, poolid, POOL_BIND_ALL);
439*7c478bd9Sstevel@tonic-gate 	if (ret == 0) {
440*7c478bd9Sstevel@tonic-gate 		struct destroy_zone_arg dzarg;
441*7c478bd9Sstevel@tonic-gate 
442*7c478bd9Sstevel@tonic-gate 		dzarg.old = pool;
443*7c478bd9Sstevel@tonic-gate 		dzarg.new = pool_default;
444*7c478bd9Sstevel@tonic-gate 		mutex_enter(&cpu_lock);
445*7c478bd9Sstevel@tonic-gate 		ret = zone_walk(pool_destroy_zone_cb, &dzarg);
446*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
447*7c478bd9Sstevel@tonic-gate 		ASSERT(ret == 0);
448*7c478bd9Sstevel@tonic-gate 		ASSERT(pool->pool_ref == 0);
449*7c478bd9Sstevel@tonic-gate 		(void) nvlist_free(pool->pool_props);
450*7c478bd9Sstevel@tonic-gate 		id_free(pool_ids, pool->pool_id);
451*7c478bd9Sstevel@tonic-gate 		pool->pool_pset->pset_npools--;
452*7c478bd9Sstevel@tonic-gate 		list_remove(&pool_list, pool);
453*7c478bd9Sstevel@tonic-gate 		pool_count--;
454*7c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
455*7c478bd9Sstevel@tonic-gate 		kmem_free(pool, sizeof (pool_t));
456*7c478bd9Sstevel@tonic-gate 	}
457*7c478bd9Sstevel@tonic-gate 	return (ret);
458*7c478bd9Sstevel@tonic-gate }
459*7c478bd9Sstevel@tonic-gate 
460*7c478bd9Sstevel@tonic-gate /*
461*7c478bd9Sstevel@tonic-gate  * Create new pool or resource set.
462*7c478bd9Sstevel@tonic-gate  */
463*7c478bd9Sstevel@tonic-gate int
464*7c478bd9Sstevel@tonic-gate pool_create(int class, int subclass, id_t *id)
465*7c478bd9Sstevel@tonic-gate {
466*7c478bd9Sstevel@tonic-gate 	int ret;
467*7c478bd9Sstevel@tonic-gate 
468*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
469*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
470*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
471*7c478bd9Sstevel@tonic-gate 	switch (class) {
472*7c478bd9Sstevel@tonic-gate 	case PEC_POOL:
473*7c478bd9Sstevel@tonic-gate 		ret = pool_pool_create((poolid_t *)id);
474*7c478bd9Sstevel@tonic-gate 		break;
475*7c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
476*7c478bd9Sstevel@tonic-gate 		switch (subclass) {
477*7c478bd9Sstevel@tonic-gate 		case PREC_PSET:
478*7c478bd9Sstevel@tonic-gate 			ret = pool_pset_create((psetid_t *)id);
479*7c478bd9Sstevel@tonic-gate 			break;
480*7c478bd9Sstevel@tonic-gate 		default:
481*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
482*7c478bd9Sstevel@tonic-gate 		}
483*7c478bd9Sstevel@tonic-gate 		break;
484*7c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
485*7c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
486*7c478bd9Sstevel@tonic-gate 		break;
487*7c478bd9Sstevel@tonic-gate 	default:
488*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
489*7c478bd9Sstevel@tonic-gate 	}
490*7c478bd9Sstevel@tonic-gate 	return (ret);
491*7c478bd9Sstevel@tonic-gate }
492*7c478bd9Sstevel@tonic-gate 
493*7c478bd9Sstevel@tonic-gate /*
494*7c478bd9Sstevel@tonic-gate  * Destroy an existing pool or resource set.
495*7c478bd9Sstevel@tonic-gate  */
496*7c478bd9Sstevel@tonic-gate int
497*7c478bd9Sstevel@tonic-gate pool_destroy(int class, int subclass, id_t id)
498*7c478bd9Sstevel@tonic-gate {
499*7c478bd9Sstevel@tonic-gate 	int ret;
500*7c478bd9Sstevel@tonic-gate 
501*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
502*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
503*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
504*7c478bd9Sstevel@tonic-gate 	switch (class) {
505*7c478bd9Sstevel@tonic-gate 	case PEC_POOL:
506*7c478bd9Sstevel@tonic-gate 		ret = pool_pool_destroy((poolid_t)id);
507*7c478bd9Sstevel@tonic-gate 		break;
508*7c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
509*7c478bd9Sstevel@tonic-gate 		switch (subclass) {
510*7c478bd9Sstevel@tonic-gate 		case PREC_PSET:
511*7c478bd9Sstevel@tonic-gate 			ret = pool_pset_destroy((psetid_t)id);
512*7c478bd9Sstevel@tonic-gate 			break;
513*7c478bd9Sstevel@tonic-gate 		default:
514*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
515*7c478bd9Sstevel@tonic-gate 		}
516*7c478bd9Sstevel@tonic-gate 		break;
517*7c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
518*7c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
519*7c478bd9Sstevel@tonic-gate 		break;
520*7c478bd9Sstevel@tonic-gate 	default:
521*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
522*7c478bd9Sstevel@tonic-gate 	}
523*7c478bd9Sstevel@tonic-gate 	return (ret);
524*7c478bd9Sstevel@tonic-gate }
525*7c478bd9Sstevel@tonic-gate 
526*7c478bd9Sstevel@tonic-gate /*
527*7c478bd9Sstevel@tonic-gate  * Enable or disable pools.
528*7c478bd9Sstevel@tonic-gate  */
529*7c478bd9Sstevel@tonic-gate int
530*7c478bd9Sstevel@tonic-gate pool_status(int status)
531*7c478bd9Sstevel@tonic-gate {
532*7c478bd9Sstevel@tonic-gate 	int ret = 0;
533*7c478bd9Sstevel@tonic-gate 
534*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
535*7c478bd9Sstevel@tonic-gate 
536*7c478bd9Sstevel@tonic-gate 	if (pool_state == status)
537*7c478bd9Sstevel@tonic-gate 		return (0);
538*7c478bd9Sstevel@tonic-gate 	switch (status) {
539*7c478bd9Sstevel@tonic-gate 	case POOL_ENABLED:
540*7c478bd9Sstevel@tonic-gate 		ret = pool_enable();
541*7c478bd9Sstevel@tonic-gate 		if (ret != 0)
542*7c478bd9Sstevel@tonic-gate 			return (ret);
543*7c478bd9Sstevel@tonic-gate 		pool_state = POOL_ENABLED;
544*7c478bd9Sstevel@tonic-gate 		break;
545*7c478bd9Sstevel@tonic-gate 	case POOL_DISABLED:
546*7c478bd9Sstevel@tonic-gate 		ret = pool_disable();
547*7c478bd9Sstevel@tonic-gate 		if (ret != 0)
548*7c478bd9Sstevel@tonic-gate 			return (ret);
549*7c478bd9Sstevel@tonic-gate 		pool_state = POOL_DISABLED;
550*7c478bd9Sstevel@tonic-gate 		break;
551*7c478bd9Sstevel@tonic-gate 	default:
552*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
553*7c478bd9Sstevel@tonic-gate 	}
554*7c478bd9Sstevel@tonic-gate 	return (ret);
555*7c478bd9Sstevel@tonic-gate }
556*7c478bd9Sstevel@tonic-gate 
557*7c478bd9Sstevel@tonic-gate /*
558*7c478bd9Sstevel@tonic-gate  * Associate pool with resource set.
559*7c478bd9Sstevel@tonic-gate  */
560*7c478bd9Sstevel@tonic-gate int
561*7c478bd9Sstevel@tonic-gate pool_assoc(poolid_t poolid, int idtype, id_t id)
562*7c478bd9Sstevel@tonic-gate {
563*7c478bd9Sstevel@tonic-gate 	int ret;
564*7c478bd9Sstevel@tonic-gate 
565*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
566*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
567*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
568*7c478bd9Sstevel@tonic-gate 	switch (idtype) {
569*7c478bd9Sstevel@tonic-gate 	case PREC_PSET:
570*7c478bd9Sstevel@tonic-gate 		ret = pool_pset_assoc(poolid, (psetid_t)id);
571*7c478bd9Sstevel@tonic-gate 		break;
572*7c478bd9Sstevel@tonic-gate 	default:
573*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
574*7c478bd9Sstevel@tonic-gate 	}
575*7c478bd9Sstevel@tonic-gate 	if (ret == 0)
576*7c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
577*7c478bd9Sstevel@tonic-gate 	return (ret);
578*7c478bd9Sstevel@tonic-gate }
579*7c478bd9Sstevel@tonic-gate 
580*7c478bd9Sstevel@tonic-gate /*
581*7c478bd9Sstevel@tonic-gate  * Disassociate resource set from pool.
582*7c478bd9Sstevel@tonic-gate  */
583*7c478bd9Sstevel@tonic-gate int
584*7c478bd9Sstevel@tonic-gate pool_dissoc(poolid_t poolid, int idtype)
585*7c478bd9Sstevel@tonic-gate {
586*7c478bd9Sstevel@tonic-gate 	int ret;
587*7c478bd9Sstevel@tonic-gate 
588*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
589*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
590*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
591*7c478bd9Sstevel@tonic-gate 	switch (idtype) {
592*7c478bd9Sstevel@tonic-gate 	case PREC_PSET:
593*7c478bd9Sstevel@tonic-gate 		ret = pool_pset_assoc(poolid, PS_NONE);
594*7c478bd9Sstevel@tonic-gate 		break;
595*7c478bd9Sstevel@tonic-gate 	default:
596*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
597*7c478bd9Sstevel@tonic-gate 	}
598*7c478bd9Sstevel@tonic-gate 	if (ret == 0)
599*7c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
600*7c478bd9Sstevel@tonic-gate 	return (ret);
601*7c478bd9Sstevel@tonic-gate }
602*7c478bd9Sstevel@tonic-gate 
603*7c478bd9Sstevel@tonic-gate /*
604*7c478bd9Sstevel@tonic-gate  * Transfer specified quantity of resources between resource sets.
605*7c478bd9Sstevel@tonic-gate  */
606*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
607*7c478bd9Sstevel@tonic-gate int
608*7c478bd9Sstevel@tonic-gate pool_transfer(int type, id_t src, id_t dst, uint64_t qty)
609*7c478bd9Sstevel@tonic-gate {
610*7c478bd9Sstevel@tonic-gate 	int ret = EINVAL;
611*7c478bd9Sstevel@tonic-gate 	return (ret);
612*7c478bd9Sstevel@tonic-gate }
613*7c478bd9Sstevel@tonic-gate 
614*7c478bd9Sstevel@tonic-gate /*
615*7c478bd9Sstevel@tonic-gate  * Transfer resources specified by their IDs between resource sets.
616*7c478bd9Sstevel@tonic-gate  */
617*7c478bd9Sstevel@tonic-gate int
618*7c478bd9Sstevel@tonic-gate pool_xtransfer(int type, id_t src, id_t dst, uint_t size, id_t *ids)
619*7c478bd9Sstevel@tonic-gate {
620*7c478bd9Sstevel@tonic-gate 	int ret;
621*7c478bd9Sstevel@tonic-gate 
622*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
623*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
624*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
625*7c478bd9Sstevel@tonic-gate 	switch (type) {
626*7c478bd9Sstevel@tonic-gate 	case PREC_PSET:
627*7c478bd9Sstevel@tonic-gate 		ret = pool_pset_xtransfer((psetid_t)src, (psetid_t)dst,
628*7c478bd9Sstevel@tonic-gate 		    size, ids);
629*7c478bd9Sstevel@tonic-gate 		break;
630*7c478bd9Sstevel@tonic-gate 	default:
631*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
632*7c478bd9Sstevel@tonic-gate 	}
633*7c478bd9Sstevel@tonic-gate 	return (ret);
634*7c478bd9Sstevel@tonic-gate }
635*7c478bd9Sstevel@tonic-gate 
636*7c478bd9Sstevel@tonic-gate /*
637*7c478bd9Sstevel@tonic-gate  * Bind processes to pools.
638*7c478bd9Sstevel@tonic-gate  */
639*7c478bd9Sstevel@tonic-gate int
640*7c478bd9Sstevel@tonic-gate pool_bind(poolid_t poolid, idtype_t idtype, id_t id)
641*7c478bd9Sstevel@tonic-gate {
642*7c478bd9Sstevel@tonic-gate 	pool_t *pool;
643*7c478bd9Sstevel@tonic-gate 
644*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
645*7c478bd9Sstevel@tonic-gate 
646*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
647*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
648*7c478bd9Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
649*7c478bd9Sstevel@tonic-gate 		return (ESRCH);
650*7c478bd9Sstevel@tonic-gate 
651*7c478bd9Sstevel@tonic-gate 	switch (idtype) {
652*7c478bd9Sstevel@tonic-gate 	case P_PID:
653*7c478bd9Sstevel@tonic-gate 	case P_TASKID:
654*7c478bd9Sstevel@tonic-gate 	case P_PROJID:
655*7c478bd9Sstevel@tonic-gate 	case P_ZONEID:
656*7c478bd9Sstevel@tonic-gate 		break;
657*7c478bd9Sstevel@tonic-gate 	default:
658*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
659*7c478bd9Sstevel@tonic-gate 	}
660*7c478bd9Sstevel@tonic-gate 	return (pool_do_bind(pool, idtype, id, POOL_BIND_ALL));
661*7c478bd9Sstevel@tonic-gate }
662*7c478bd9Sstevel@tonic-gate 
663*7c478bd9Sstevel@tonic-gate /*
664*7c478bd9Sstevel@tonic-gate  * Query pool binding of the specifed process.
665*7c478bd9Sstevel@tonic-gate  */
666*7c478bd9Sstevel@tonic-gate int
667*7c478bd9Sstevel@tonic-gate pool_query_binding(idtype_t idtype, id_t id, id_t *poolid)
668*7c478bd9Sstevel@tonic-gate {
669*7c478bd9Sstevel@tonic-gate 	proc_t *p;
670*7c478bd9Sstevel@tonic-gate 
671*7c478bd9Sstevel@tonic-gate 	if (idtype != P_PID)
672*7c478bd9Sstevel@tonic-gate 		return (ENOTSUP);
673*7c478bd9Sstevel@tonic-gate 	if (id == P_MYID)
674*7c478bd9Sstevel@tonic-gate 		id = curproc->p_pid;
675*7c478bd9Sstevel@tonic-gate 
676*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
677*7c478bd9Sstevel@tonic-gate 
678*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pidlock);
679*7c478bd9Sstevel@tonic-gate 	if ((p = prfind((pid_t)id)) == NULL) {
680*7c478bd9Sstevel@tonic-gate 		mutex_exit(&pidlock);
681*7c478bd9Sstevel@tonic-gate 		return (ESRCH);
682*7c478bd9Sstevel@tonic-gate 	}
683*7c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
684*7c478bd9Sstevel@tonic-gate 	/*
685*7c478bd9Sstevel@tonic-gate 	 * In local zones, lie about pool bindings of processes from
686*7c478bd9Sstevel@tonic-gate 	 * the global zone.
687*7c478bd9Sstevel@tonic-gate 	 */
688*7c478bd9Sstevel@tonic-gate 	if (!INGLOBALZONE(curproc) && INGLOBALZONE(p)) {
689*7c478bd9Sstevel@tonic-gate 		pool_t *pool;
690*7c478bd9Sstevel@tonic-gate 
691*7c478bd9Sstevel@tonic-gate 		pool = zone_pool_get(curproc->p_zone);
692*7c478bd9Sstevel@tonic-gate 		*poolid = pool->pool_id;
693*7c478bd9Sstevel@tonic-gate 	} else {
694*7c478bd9Sstevel@tonic-gate 		*poolid = p->p_pool->pool_id;
695*7c478bd9Sstevel@tonic-gate 	}
696*7c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
697*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
698*7c478bd9Sstevel@tonic-gate 	return (0);
699*7c478bd9Sstevel@tonic-gate }
700*7c478bd9Sstevel@tonic-gate 
701*7c478bd9Sstevel@tonic-gate static ea_object_t *
702*7c478bd9Sstevel@tonic-gate pool_system_pack(void)
703*7c478bd9Sstevel@tonic-gate {
704*7c478bd9Sstevel@tonic-gate 	ea_object_t *eo_system;
705*7c478bd9Sstevel@tonic-gate 	size_t bufsz = 0;
706*7c478bd9Sstevel@tonic-gate 	char *buf = NULL;
707*7c478bd9Sstevel@tonic-gate 
708*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
709*7c478bd9Sstevel@tonic-gate 
710*7c478bd9Sstevel@tonic-gate 	eo_system = ea_alloc_group(EXT_GROUP | EXC_LOCAL | EXD_GROUP_SYSTEM);
711*7c478bd9Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, &pool_sys_mod, sizeof (hrtime_t),
712*7c478bd9Sstevel@tonic-gate 	    EXC_LOCAL | EXD_SYSTEM_TSTAMP | EXT_UINT64);
713*7c478bd9Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
714*7c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_system, &pool_pool_mod,
715*7c478bd9Sstevel@tonic-gate 		    sizeof (hrtime_t),
716*7c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_TSTAMP | EXT_UINT64);
717*7c478bd9Sstevel@tonic-gate 	else
718*7c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_system,
719*7c478bd9Sstevel@tonic-gate 		    &curproc->p_zone->zone_pool_mod,
720*7c478bd9Sstevel@tonic-gate 		    sizeof (hrtime_t),
721*7c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_TSTAMP | EXT_UINT64);
722*7c478bd9Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, &pool_pset_mod, sizeof (hrtime_t),
723*7c478bd9Sstevel@tonic-gate 	    EXC_LOCAL | EXD_PSET_TSTAMP | EXT_UINT64);
724*7c478bd9Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, &pool_cpu_mod, sizeof (hrtime_t),
725*7c478bd9Sstevel@tonic-gate 	    EXC_LOCAL | EXD_CPU_TSTAMP | EXT_UINT64);
726*7c478bd9Sstevel@tonic-gate 	(void) nvlist_pack(pool_sys_prop, &buf, &bufsz, NV_ENCODE_NATIVE, 0);
727*7c478bd9Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, buf, bufsz,
728*7c478bd9Sstevel@tonic-gate 	    EXC_LOCAL | EXD_SYSTEM_PROP | EXT_RAW);
729*7c478bd9Sstevel@tonic-gate 	kmem_free(buf, bufsz);
730*7c478bd9Sstevel@tonic-gate 	return (eo_system);
731*7c478bd9Sstevel@tonic-gate }
732*7c478bd9Sstevel@tonic-gate 
733*7c478bd9Sstevel@tonic-gate /*
734*7c478bd9Sstevel@tonic-gate  * Pack information about pools and attach it to specified exacct group.
735*7c478bd9Sstevel@tonic-gate  */
736*7c478bd9Sstevel@tonic-gate static int
737*7c478bd9Sstevel@tonic-gate pool_pool_pack(ea_object_t *eo_system)
738*7c478bd9Sstevel@tonic-gate {
739*7c478bd9Sstevel@tonic-gate 	ea_object_t *eo_pool;
740*7c478bd9Sstevel@tonic-gate 	pool_t *pool;
741*7c478bd9Sstevel@tonic-gate 	size_t bufsz;
742*7c478bd9Sstevel@tonic-gate 	char *buf;
743*7c478bd9Sstevel@tonic-gate 	pool_t *myzonepool;
744*7c478bd9Sstevel@tonic-gate 
745*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
746*7c478bd9Sstevel@tonic-gate 	myzonepool = zone_pool_get(curproc->p_zone);
747*7c478bd9Sstevel@tonic-gate 	for (pool = list_head(&pool_list); pool;
748*7c478bd9Sstevel@tonic-gate 	    pool = list_next(&pool_list, pool)) {
749*7c478bd9Sstevel@tonic-gate 		if (!INGLOBALZONE(curproc) && myzonepool != pool)
750*7c478bd9Sstevel@tonic-gate 			continue;
751*7c478bd9Sstevel@tonic-gate 		bufsz = 0;
752*7c478bd9Sstevel@tonic-gate 		buf = NULL;
753*7c478bd9Sstevel@tonic-gate 		eo_pool = ea_alloc_group(EXT_GROUP |
754*7c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_GROUP_POOL);
755*7c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_pool, &pool->pool_id, sizeof (id_t),
756*7c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_POOLID | EXT_UINT32);
757*7c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_pool, &pool->pool_pset->pset_id,
758*7c478bd9Sstevel@tonic-gate 		    sizeof (id_t), EXC_LOCAL | EXD_POOL_PSETID | EXT_UINT32);
759*7c478bd9Sstevel@tonic-gate 		(void) nvlist_pack(pool->pool_props, &buf, &bufsz,
760*7c478bd9Sstevel@tonic-gate 		    NV_ENCODE_NATIVE, 0);
761*7c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_pool, buf, bufsz,
762*7c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_PROP | EXT_RAW);
763*7c478bd9Sstevel@tonic-gate 		kmem_free(buf, bufsz);
764*7c478bd9Sstevel@tonic-gate 		(void) ea_attach_to_group(eo_system, eo_pool);
765*7c478bd9Sstevel@tonic-gate 	}
766*7c478bd9Sstevel@tonic-gate 	return (0);
767*7c478bd9Sstevel@tonic-gate }
768*7c478bd9Sstevel@tonic-gate 
769*7c478bd9Sstevel@tonic-gate /*
770*7c478bd9Sstevel@tonic-gate  * Pack the whole pool configuration in the specified buffer.
771*7c478bd9Sstevel@tonic-gate  */
772*7c478bd9Sstevel@tonic-gate int
773*7c478bd9Sstevel@tonic-gate pool_pack_conf(void *kbuf, size_t kbufsz, size_t *asize)
774*7c478bd9Sstevel@tonic-gate {
775*7c478bd9Sstevel@tonic-gate 	ea_object_t *eo_system;
776*7c478bd9Sstevel@tonic-gate 	size_t ksize;
777*7c478bd9Sstevel@tonic-gate 	int ret = 0;
778*7c478bd9Sstevel@tonic-gate 
779*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
780*7c478bd9Sstevel@tonic-gate 
781*7c478bd9Sstevel@tonic-gate 	eo_system = pool_system_pack();		/* 1. pack system */
782*7c478bd9Sstevel@tonic-gate 	(void) pool_pool_pack(eo_system);	/* 2. pack all pools */
783*7c478bd9Sstevel@tonic-gate 	(void) pool_pset_pack(eo_system);	/* 3. pack all psets */
784*7c478bd9Sstevel@tonic-gate 	ksize = ea_pack_object(eo_system, NULL, 0);
785*7c478bd9Sstevel@tonic-gate 	if (kbuf == NULL || kbufsz == 0)
786*7c478bd9Sstevel@tonic-gate 		*asize = ksize;
787*7c478bd9Sstevel@tonic-gate 	else if (ksize > kbufsz)
788*7c478bd9Sstevel@tonic-gate 		ret = ENOMEM;
789*7c478bd9Sstevel@tonic-gate 	else
790*7c478bd9Sstevel@tonic-gate 		*asize = ea_pack_object(eo_system, kbuf, kbufsz);
791*7c478bd9Sstevel@tonic-gate 	ea_free_object(eo_system, EUP_ALLOC);
792*7c478bd9Sstevel@tonic-gate 	return (ret);
793*7c478bd9Sstevel@tonic-gate }
794*7c478bd9Sstevel@tonic-gate 
795*7c478bd9Sstevel@tonic-gate /*
796*7c478bd9Sstevel@tonic-gate  * Start/end the commit transaction.  If commit transaction is currently
797*7c478bd9Sstevel@tonic-gate  * in progress, then all POOL_QUERY ioctls will return pools configuration
798*7c478bd9Sstevel@tonic-gate  * at the beginning of transaction.
799*7c478bd9Sstevel@tonic-gate  */
800*7c478bd9Sstevel@tonic-gate int
801*7c478bd9Sstevel@tonic-gate pool_commit(int state)
802*7c478bd9Sstevel@tonic-gate {
803*7c478bd9Sstevel@tonic-gate 	ea_object_t *eo_system;
804*7c478bd9Sstevel@tonic-gate 	int ret = 0;
805*7c478bd9Sstevel@tonic-gate 
806*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
807*7c478bd9Sstevel@tonic-gate 
808*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
809*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
810*7c478bd9Sstevel@tonic-gate 	switch (state) {
811*7c478bd9Sstevel@tonic-gate 	case 1:
812*7c478bd9Sstevel@tonic-gate 		/*
813*7c478bd9Sstevel@tonic-gate 		 * Beginning commit transation.
814*7c478bd9Sstevel@tonic-gate 		 */
815*7c478bd9Sstevel@tonic-gate 		if (pool_buf != NULL)		/* transaction in progress */
816*7c478bd9Sstevel@tonic-gate 			return (EBUSY);
817*7c478bd9Sstevel@tonic-gate 		eo_system = pool_system_pack();		/* 1. pack system */
818*7c478bd9Sstevel@tonic-gate 		(void) pool_pool_pack(eo_system);	/* 2. pack all pools */
819*7c478bd9Sstevel@tonic-gate 		(void) pool_pset_pack(eo_system);	/* 3. pack all psets */
820*7c478bd9Sstevel@tonic-gate 		pool_bufsz = ea_pack_object(eo_system, NULL, 0);
821*7c478bd9Sstevel@tonic-gate 		pool_buf = kmem_alloc(pool_bufsz, KM_SLEEP);
822*7c478bd9Sstevel@tonic-gate 		pool_bufsz = ea_pack_object(eo_system, pool_buf, pool_bufsz);
823*7c478bd9Sstevel@tonic-gate 		ea_free_object(eo_system, EUP_ALLOC);
824*7c478bd9Sstevel@tonic-gate 		break;
825*7c478bd9Sstevel@tonic-gate 	case 0:
826*7c478bd9Sstevel@tonic-gate 		/*
827*7c478bd9Sstevel@tonic-gate 		 * Finishing commit transaction.
828*7c478bd9Sstevel@tonic-gate 		 */
829*7c478bd9Sstevel@tonic-gate 		if (pool_buf != NULL) {
830*7c478bd9Sstevel@tonic-gate 			kmem_free(pool_buf, pool_bufsz);
831*7c478bd9Sstevel@tonic-gate 			pool_buf = NULL;
832*7c478bd9Sstevel@tonic-gate 			pool_bufsz = 0;
833*7c478bd9Sstevel@tonic-gate 		}
834*7c478bd9Sstevel@tonic-gate 		break;
835*7c478bd9Sstevel@tonic-gate 	default:
836*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
837*7c478bd9Sstevel@tonic-gate 	}
838*7c478bd9Sstevel@tonic-gate 	return (ret);
839*7c478bd9Sstevel@tonic-gate }
840*7c478bd9Sstevel@tonic-gate 
841*7c478bd9Sstevel@tonic-gate /*
842*7c478bd9Sstevel@tonic-gate  * Check is the specified property is special
843*7c478bd9Sstevel@tonic-gate  */
844*7c478bd9Sstevel@tonic-gate static pool_property_t *
845*7c478bd9Sstevel@tonic-gate pool_property_find(char *name, pool_property_t *list)
846*7c478bd9Sstevel@tonic-gate {
847*7c478bd9Sstevel@tonic-gate 	pool_property_t *prop;
848*7c478bd9Sstevel@tonic-gate 
849*7c478bd9Sstevel@tonic-gate 	for (prop = list; prop->pp_name != NULL; prop++)
850*7c478bd9Sstevel@tonic-gate 		if (strcmp(prop->pp_name, name) == 0)
851*7c478bd9Sstevel@tonic-gate 			return (prop);
852*7c478bd9Sstevel@tonic-gate 	return (NULL);
853*7c478bd9Sstevel@tonic-gate }
854*7c478bd9Sstevel@tonic-gate 
855*7c478bd9Sstevel@tonic-gate static pool_property_t pool_prop_sys[] = {
856*7c478bd9Sstevel@tonic-gate 	{ "system.name",		DATA_TYPE_STRING,	PP_RDWR },
857*7c478bd9Sstevel@tonic-gate 	{ "system.comment",		DATA_TYPE_STRING,	PP_RDWR },
858*7c478bd9Sstevel@tonic-gate 	{ "system.version",		DATA_TYPE_UINT64,	PP_READ },
859*7c478bd9Sstevel@tonic-gate 	{ "system.bind-default",	DATA_TYPE_BYTE,		PP_RDWR },
860*7c478bd9Sstevel@tonic-gate 	{ "system.allocate-method",	DATA_TYPE_STRING,
861*7c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
862*7c478bd9Sstevel@tonic-gate 	{ "system.poold.log-level",	DATA_TYPE_STRING,
863*7c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
864*7c478bd9Sstevel@tonic-gate 	{ "system.poold.log-location",	DATA_TYPE_STRING,
865*7c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
866*7c478bd9Sstevel@tonic-gate 	{ "system.poold.monitor-interval",	DATA_TYPE_UINT64,
867*7c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
868*7c478bd9Sstevel@tonic-gate 	{ "system.poold.history-file",	DATA_TYPE_STRING,
869*7c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
870*7c478bd9Sstevel@tonic-gate 	{ "system.poold.objectives",	DATA_TYPE_STRING,
871*7c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
872*7c478bd9Sstevel@tonic-gate 	{ NULL,				0,			0 }
873*7c478bd9Sstevel@tonic-gate };
874*7c478bd9Sstevel@tonic-gate 
875*7c478bd9Sstevel@tonic-gate static pool_property_t pool_prop_pool[] = {
876*7c478bd9Sstevel@tonic-gate 	{ "pool.sys_id",		DATA_TYPE_UINT64,	PP_READ },
877*7c478bd9Sstevel@tonic-gate 	{ "pool.name",			DATA_TYPE_STRING,	PP_RDWR },
878*7c478bd9Sstevel@tonic-gate 	{ "pool.default",		DATA_TYPE_BYTE,		PP_READ },
879*7c478bd9Sstevel@tonic-gate 	{ "pool.active",		DATA_TYPE_BYTE,		PP_RDWR },
880*7c478bd9Sstevel@tonic-gate 	{ "pool.importance",		DATA_TYPE_INT64,	PP_RDWR },
881*7c478bd9Sstevel@tonic-gate 	{ "pool.comment",		DATA_TYPE_STRING,	PP_RDWR },
882*7c478bd9Sstevel@tonic-gate 	{ "pool.scheduler",		DATA_TYPE_STRING,
883*7c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
884*7c478bd9Sstevel@tonic-gate 	{ NULL,				0,			0 }
885*7c478bd9Sstevel@tonic-gate };
886*7c478bd9Sstevel@tonic-gate 
887*7c478bd9Sstevel@tonic-gate /*
888*7c478bd9Sstevel@tonic-gate  * Common routine to put new property on the specified list
889*7c478bd9Sstevel@tonic-gate  */
890*7c478bd9Sstevel@tonic-gate int
891*7c478bd9Sstevel@tonic-gate pool_propput_common(nvlist_t *nvlist, nvpair_t *pair, pool_property_t *props)
892*7c478bd9Sstevel@tonic-gate {
893*7c478bd9Sstevel@tonic-gate 	pool_property_t *prop;
894*7c478bd9Sstevel@tonic-gate 
895*7c478bd9Sstevel@tonic-gate 	if ((prop = pool_property_find(nvpair_name(pair), props)) != NULL) {
896*7c478bd9Sstevel@tonic-gate 		/*
897*7c478bd9Sstevel@tonic-gate 		 * No read-only properties or properties with bad types
898*7c478bd9Sstevel@tonic-gate 		 */
899*7c478bd9Sstevel@tonic-gate 		if (!(prop->pp_perm & PP_WRITE) ||
900*7c478bd9Sstevel@tonic-gate 		    prop->pp_type != nvpair_type(pair))
901*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
902*7c478bd9Sstevel@tonic-gate 	}
903*7c478bd9Sstevel@tonic-gate 	return (nvlist_add_nvpair(nvlist, pair));
904*7c478bd9Sstevel@tonic-gate }
905*7c478bd9Sstevel@tonic-gate 
906*7c478bd9Sstevel@tonic-gate /*
907*7c478bd9Sstevel@tonic-gate  * Common routine to remove property from the given list
908*7c478bd9Sstevel@tonic-gate  */
909*7c478bd9Sstevel@tonic-gate int
910*7c478bd9Sstevel@tonic-gate pool_proprm_common(nvlist_t *nvlist, char *name, pool_property_t *props)
911*7c478bd9Sstevel@tonic-gate {
912*7c478bd9Sstevel@tonic-gate 	pool_property_t *prop;
913*7c478bd9Sstevel@tonic-gate 
914*7c478bd9Sstevel@tonic-gate 	if ((prop = pool_property_find(name, props)) != NULL) {
915*7c478bd9Sstevel@tonic-gate 		if (!(prop->pp_perm & PP_OPTIONAL))
916*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
917*7c478bd9Sstevel@tonic-gate 	}
918*7c478bd9Sstevel@tonic-gate 	return (nvlist_remove_all(nvlist, name));
919*7c478bd9Sstevel@tonic-gate }
920*7c478bd9Sstevel@tonic-gate 
921*7c478bd9Sstevel@tonic-gate static int
922*7c478bd9Sstevel@tonic-gate pool_system_propput(nvpair_t *pair)
923*7c478bd9Sstevel@tonic-gate {
924*7c478bd9Sstevel@tonic-gate 	int ret;
925*7c478bd9Sstevel@tonic-gate 
926*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
927*7c478bd9Sstevel@tonic-gate 	ret = pool_propput_common(pool_sys_prop, pair, pool_prop_sys);
928*7c478bd9Sstevel@tonic-gate 	if (ret == 0)
929*7c478bd9Sstevel@tonic-gate 		pool_sys_mod = gethrtime();
930*7c478bd9Sstevel@tonic-gate 	return (ret);
931*7c478bd9Sstevel@tonic-gate }
932*7c478bd9Sstevel@tonic-gate 
933*7c478bd9Sstevel@tonic-gate static int
934*7c478bd9Sstevel@tonic-gate pool_system_proprm(char *name)
935*7c478bd9Sstevel@tonic-gate {
936*7c478bd9Sstevel@tonic-gate 	int ret;
937*7c478bd9Sstevel@tonic-gate 
938*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
939*7c478bd9Sstevel@tonic-gate 	ret = pool_proprm_common(pool_sys_prop, name, pool_prop_sys);
940*7c478bd9Sstevel@tonic-gate 	if (ret == 0)
941*7c478bd9Sstevel@tonic-gate 		pool_sys_mod = gethrtime();
942*7c478bd9Sstevel@tonic-gate 	return (ret);
943*7c478bd9Sstevel@tonic-gate }
944*7c478bd9Sstevel@tonic-gate 
945*7c478bd9Sstevel@tonic-gate static int
946*7c478bd9Sstevel@tonic-gate pool_pool_propput(poolid_t poolid, nvpair_t *pair)
947*7c478bd9Sstevel@tonic-gate {
948*7c478bd9Sstevel@tonic-gate 	pool_t *pool;
949*7c478bd9Sstevel@tonic-gate 	int ret;
950*7c478bd9Sstevel@tonic-gate 
951*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
952*7c478bd9Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
953*7c478bd9Sstevel@tonic-gate 		return (ESRCH);
954*7c478bd9Sstevel@tonic-gate 	ret = pool_propput_common(pool->pool_props, pair, pool_prop_pool);
955*7c478bd9Sstevel@tonic-gate 	if (ret == 0)
956*7c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
957*7c478bd9Sstevel@tonic-gate 	return (ret);
958*7c478bd9Sstevel@tonic-gate }
959*7c478bd9Sstevel@tonic-gate 
960*7c478bd9Sstevel@tonic-gate static int
961*7c478bd9Sstevel@tonic-gate pool_pool_proprm(poolid_t poolid, char *name)
962*7c478bd9Sstevel@tonic-gate {
963*7c478bd9Sstevel@tonic-gate 	int ret;
964*7c478bd9Sstevel@tonic-gate 	pool_t *pool;
965*7c478bd9Sstevel@tonic-gate 
966*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
967*7c478bd9Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
968*7c478bd9Sstevel@tonic-gate 		return (ESRCH);
969*7c478bd9Sstevel@tonic-gate 	ret = pool_proprm_common(pool->pool_props, name, pool_prop_pool);
970*7c478bd9Sstevel@tonic-gate 	if (ret == 0)
971*7c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
972*7c478bd9Sstevel@tonic-gate 	return (ret);
973*7c478bd9Sstevel@tonic-gate }
974*7c478bd9Sstevel@tonic-gate 
975*7c478bd9Sstevel@tonic-gate int
976*7c478bd9Sstevel@tonic-gate pool_propput(int class, int subclass, id_t id, nvpair_t *pair)
977*7c478bd9Sstevel@tonic-gate {
978*7c478bd9Sstevel@tonic-gate 	int ret;
979*7c478bd9Sstevel@tonic-gate 
980*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
981*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
982*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
983*7c478bd9Sstevel@tonic-gate 	switch (class) {
984*7c478bd9Sstevel@tonic-gate 	case PEC_SYSTEM:
985*7c478bd9Sstevel@tonic-gate 		ret = pool_system_propput(pair);
986*7c478bd9Sstevel@tonic-gate 		break;
987*7c478bd9Sstevel@tonic-gate 	case PEC_POOL:
988*7c478bd9Sstevel@tonic-gate 		ret = pool_pool_propput((poolid_t)id, pair);
989*7c478bd9Sstevel@tonic-gate 		break;
990*7c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
991*7c478bd9Sstevel@tonic-gate 		switch (subclass) {
992*7c478bd9Sstevel@tonic-gate 		case PREC_PSET:
993*7c478bd9Sstevel@tonic-gate 			ret = pool_pset_propput((psetid_t)id, pair);
994*7c478bd9Sstevel@tonic-gate 			break;
995*7c478bd9Sstevel@tonic-gate 		default:
996*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
997*7c478bd9Sstevel@tonic-gate 		}
998*7c478bd9Sstevel@tonic-gate 		break;
999*7c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
1000*7c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
1001*7c478bd9Sstevel@tonic-gate 		break;
1002*7c478bd9Sstevel@tonic-gate 	case PEC_COMP:
1003*7c478bd9Sstevel@tonic-gate 		switch (subclass) {
1004*7c478bd9Sstevel@tonic-gate 		case PCEC_CPU:
1005*7c478bd9Sstevel@tonic-gate 			ret = pool_cpu_propput((processorid_t)id, pair);
1006*7c478bd9Sstevel@tonic-gate 			break;
1007*7c478bd9Sstevel@tonic-gate 		default:
1008*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
1009*7c478bd9Sstevel@tonic-gate 		}
1010*7c478bd9Sstevel@tonic-gate 		break;
1011*7c478bd9Sstevel@tonic-gate 	default:
1012*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
1013*7c478bd9Sstevel@tonic-gate 	}
1014*7c478bd9Sstevel@tonic-gate 	return (ret);
1015*7c478bd9Sstevel@tonic-gate }
1016*7c478bd9Sstevel@tonic-gate 
1017*7c478bd9Sstevel@tonic-gate int
1018*7c478bd9Sstevel@tonic-gate pool_proprm(int class, int subclass, id_t id, char *name)
1019*7c478bd9Sstevel@tonic-gate {
1020*7c478bd9Sstevel@tonic-gate 	int ret;
1021*7c478bd9Sstevel@tonic-gate 
1022*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
1023*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
1024*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
1025*7c478bd9Sstevel@tonic-gate 	switch (class) {
1026*7c478bd9Sstevel@tonic-gate 	case PEC_SYSTEM:
1027*7c478bd9Sstevel@tonic-gate 		ret = pool_system_proprm(name);
1028*7c478bd9Sstevel@tonic-gate 		break;
1029*7c478bd9Sstevel@tonic-gate 	case PEC_POOL:
1030*7c478bd9Sstevel@tonic-gate 		ret = pool_pool_proprm((poolid_t)id, name);
1031*7c478bd9Sstevel@tonic-gate 		break;
1032*7c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
1033*7c478bd9Sstevel@tonic-gate 		switch (subclass) {
1034*7c478bd9Sstevel@tonic-gate 		case PREC_PSET:
1035*7c478bd9Sstevel@tonic-gate 			ret = pool_pset_proprm((psetid_t)id, name);
1036*7c478bd9Sstevel@tonic-gate 			break;
1037*7c478bd9Sstevel@tonic-gate 		default:
1038*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
1039*7c478bd9Sstevel@tonic-gate 		}
1040*7c478bd9Sstevel@tonic-gate 		break;
1041*7c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
1042*7c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
1043*7c478bd9Sstevel@tonic-gate 		break;
1044*7c478bd9Sstevel@tonic-gate 	case PEC_COMP:
1045*7c478bd9Sstevel@tonic-gate 		switch (subclass) {
1046*7c478bd9Sstevel@tonic-gate 		case PCEC_CPU:
1047*7c478bd9Sstevel@tonic-gate 			ret = pool_cpu_proprm((processorid_t)id, name);
1048*7c478bd9Sstevel@tonic-gate 			break;
1049*7c478bd9Sstevel@tonic-gate 		default:
1050*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
1051*7c478bd9Sstevel@tonic-gate 		}
1052*7c478bd9Sstevel@tonic-gate 		break;
1053*7c478bd9Sstevel@tonic-gate 	default:
1054*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
1055*7c478bd9Sstevel@tonic-gate 	}
1056*7c478bd9Sstevel@tonic-gate 	return (ret);
1057*7c478bd9Sstevel@tonic-gate }
1058*7c478bd9Sstevel@tonic-gate 
1059*7c478bd9Sstevel@tonic-gate int
1060*7c478bd9Sstevel@tonic-gate pool_propget(char *name, int class, int subclass, id_t id, nvlist_t **nvlp)
1061*7c478bd9Sstevel@tonic-gate {
1062*7c478bd9Sstevel@tonic-gate 	int ret;
1063*7c478bd9Sstevel@tonic-gate 	nvlist_t *nvl;
1064*7c478bd9Sstevel@tonic-gate 
1065*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
1066*7c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
1067*7c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
1068*7c478bd9Sstevel@tonic-gate 
1069*7c478bd9Sstevel@tonic-gate 	(void) nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
1070*7c478bd9Sstevel@tonic-gate 
1071*7c478bd9Sstevel@tonic-gate 	switch (class) {
1072*7c478bd9Sstevel@tonic-gate 	case PEC_SYSTEM:
1073*7c478bd9Sstevel@tonic-gate 	case PEC_POOL:
1074*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
1075*7c478bd9Sstevel@tonic-gate 		break;
1076*7c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
1077*7c478bd9Sstevel@tonic-gate 		switch (subclass) {
1078*7c478bd9Sstevel@tonic-gate 		case PREC_PSET:
1079*7c478bd9Sstevel@tonic-gate 			ret = pool_pset_propget((psetid_t)id, name, nvl);
1080*7c478bd9Sstevel@tonic-gate 			break;
1081*7c478bd9Sstevel@tonic-gate 		default:
1082*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
1083*7c478bd9Sstevel@tonic-gate 		}
1084*7c478bd9Sstevel@tonic-gate 		break;
1085*7c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
1086*7c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
1087*7c478bd9Sstevel@tonic-gate 		break;
1088*7c478bd9Sstevel@tonic-gate 	case PEC_COMP:
1089*7c478bd9Sstevel@tonic-gate 		switch (subclass) {
1090*7c478bd9Sstevel@tonic-gate 		case PCEC_CPU:
1091*7c478bd9Sstevel@tonic-gate 			ret = pool_cpu_propget((processorid_t)id, name, nvl);
1092*7c478bd9Sstevel@tonic-gate 			break;
1093*7c478bd9Sstevel@tonic-gate 		default:
1094*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
1095*7c478bd9Sstevel@tonic-gate 		}
1096*7c478bd9Sstevel@tonic-gate 		break;
1097*7c478bd9Sstevel@tonic-gate 	default:
1098*7c478bd9Sstevel@tonic-gate 		ret = EINVAL;
1099*7c478bd9Sstevel@tonic-gate 	}
1100*7c478bd9Sstevel@tonic-gate 	if (ret == 0)
1101*7c478bd9Sstevel@tonic-gate 		*nvlp = nvl;
1102*7c478bd9Sstevel@tonic-gate 	else
1103*7c478bd9Sstevel@tonic-gate 		nvlist_free(nvl);
1104*7c478bd9Sstevel@tonic-gate 	return (ret);
1105*7c478bd9Sstevel@tonic-gate }
1106*7c478bd9Sstevel@tonic-gate 
1107*7c478bd9Sstevel@tonic-gate /*
1108*7c478bd9Sstevel@tonic-gate  * pool_bind_wake and pool_bind_wakeall are helper functions to undo PBWAITs
1109*7c478bd9Sstevel@tonic-gate  * in case of failure in pool_do_bind().
1110*7c478bd9Sstevel@tonic-gate  */
1111*7c478bd9Sstevel@tonic-gate static void
1112*7c478bd9Sstevel@tonic-gate pool_bind_wake(proc_t *p)
1113*7c478bd9Sstevel@tonic-gate {
1114*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
1115*7c478bd9Sstevel@tonic-gate 
1116*7c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
1117*7c478bd9Sstevel@tonic-gate 	ASSERT(p->p_poolflag & PBWAIT);
1118*7c478bd9Sstevel@tonic-gate 	if (p->p_poolcnt > 0) {
1119*7c478bd9Sstevel@tonic-gate 		mutex_enter(&pool_barrier_lock);
1120*7c478bd9Sstevel@tonic-gate 		pool_barrier_count -= p->p_poolcnt;
1121*7c478bd9Sstevel@tonic-gate 		mutex_exit(&pool_barrier_lock);
1122*7c478bd9Sstevel@tonic-gate 	}
1123*7c478bd9Sstevel@tonic-gate 	p->p_poolflag &= ~PBWAIT;
1124*7c478bd9Sstevel@tonic-gate 	cv_signal(&p->p_poolcv);
1125*7c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
1126*7c478bd9Sstevel@tonic-gate }
1127*7c478bd9Sstevel@tonic-gate 
1128*7c478bd9Sstevel@tonic-gate static void
1129*7c478bd9Sstevel@tonic-gate pool_bind_wakeall(proc_t **procs)
1130*7c478bd9Sstevel@tonic-gate {
1131*7c478bd9Sstevel@tonic-gate 	proc_t *p, **pp;
1132*7c478bd9Sstevel@tonic-gate 
1133*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
1134*7c478bd9Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++)
1135*7c478bd9Sstevel@tonic-gate 		pool_bind_wake(p);
1136*7c478bd9Sstevel@tonic-gate }
1137*7c478bd9Sstevel@tonic-gate 
1138*7c478bd9Sstevel@tonic-gate /*
1139*7c478bd9Sstevel@tonic-gate  * Return the scheduling class for this pool, or
1140*7c478bd9Sstevel@tonic-gate  * 	POOL_CLASS_UNSET if not set
1141*7c478bd9Sstevel@tonic-gate  * 	POOL_CLASS_INVAL if set to an invalid class ID.
1142*7c478bd9Sstevel@tonic-gate  */
1143*7c478bd9Sstevel@tonic-gate id_t
1144*7c478bd9Sstevel@tonic-gate pool_get_class(pool_t *pool)
1145*7c478bd9Sstevel@tonic-gate {
1146*7c478bd9Sstevel@tonic-gate 	char *name;
1147*7c478bd9Sstevel@tonic-gate 	id_t cid;
1148*7c478bd9Sstevel@tonic-gate 
1149*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
1150*7c478bd9Sstevel@tonic-gate 
1151*7c478bd9Sstevel@tonic-gate 	if (nvlist_lookup_string(pool->pool_props, "pool.scheduler",
1152*7c478bd9Sstevel@tonic-gate 	    &name) == 0) {
1153*7c478bd9Sstevel@tonic-gate 		if (getcidbyname(name, &cid) == 0)
1154*7c478bd9Sstevel@tonic-gate 			return (cid);
1155*7c478bd9Sstevel@tonic-gate 		else
1156*7c478bd9Sstevel@tonic-gate 			return (POOL_CLASS_INVAL);
1157*7c478bd9Sstevel@tonic-gate 	}
1158*7c478bd9Sstevel@tonic-gate 	return (POOL_CLASS_UNSET);
1159*7c478bd9Sstevel@tonic-gate }
1160*7c478bd9Sstevel@tonic-gate 
1161*7c478bd9Sstevel@tonic-gate /*
1162*7c478bd9Sstevel@tonic-gate  * Move process to the new scheduling class.
1163*7c478bd9Sstevel@tonic-gate  */
1164*7c478bd9Sstevel@tonic-gate static void
1165*7c478bd9Sstevel@tonic-gate pool_change_class(proc_t *p, id_t cid)
1166*7c478bd9Sstevel@tonic-gate {
1167*7c478bd9Sstevel@tonic-gate 	kthread_t *t;
1168*7c478bd9Sstevel@tonic-gate 	void *cldata;
1169*7c478bd9Sstevel@tonic-gate 	id_t oldcid;
1170*7c478bd9Sstevel@tonic-gate 	void **bufs;
1171*7c478bd9Sstevel@tonic-gate 	void **buf;
1172*7c478bd9Sstevel@tonic-gate 	int nlwp;
1173*7c478bd9Sstevel@tonic-gate 	int ret;
1174*7c478bd9Sstevel@tonic-gate 	int i;
1175*7c478bd9Sstevel@tonic-gate 
1176*7c478bd9Sstevel@tonic-gate 	/*
1177*7c478bd9Sstevel@tonic-gate 	 * Do not move kernel processes (such as zsched).
1178*7c478bd9Sstevel@tonic-gate 	 */
1179*7c478bd9Sstevel@tonic-gate 	if (p->p_flag & SSYS)
1180*7c478bd9Sstevel@tonic-gate 		return;
1181*7c478bd9Sstevel@tonic-gate 	/*
1182*7c478bd9Sstevel@tonic-gate 	 * This process is in the pool barrier, so it can't possibly be
1183*7c478bd9Sstevel@tonic-gate 	 * adding new threads and we can use p_lwpcnt + p_zombcnt + 1
1184*7c478bd9Sstevel@tonic-gate 	 * (for possible agent LWP which doesn't use pool barrier) as
1185*7c478bd9Sstevel@tonic-gate 	 * our upper bound.
1186*7c478bd9Sstevel@tonic-gate 	 */
1187*7c478bd9Sstevel@tonic-gate 	nlwp = p->p_lwpcnt + p->p_zombcnt + 1;
1188*7c478bd9Sstevel@tonic-gate 
1189*7c478bd9Sstevel@tonic-gate 	/*
1190*7c478bd9Sstevel@tonic-gate 	 * Pre-allocate scheduling class specific buffers before
1191*7c478bd9Sstevel@tonic-gate 	 * grabbing p_lock.
1192*7c478bd9Sstevel@tonic-gate 	 */
1193*7c478bd9Sstevel@tonic-gate 	bufs = kmem_zalloc(nlwp * sizeof (void *), KM_SLEEP);
1194*7c478bd9Sstevel@tonic-gate 	for (i = 0, buf = bufs; i < nlwp; i++, buf++) {
1195*7c478bd9Sstevel@tonic-gate 		ret = CL_ALLOC(buf, cid, KM_SLEEP);
1196*7c478bd9Sstevel@tonic-gate 		ASSERT(ret == 0);
1197*7c478bd9Sstevel@tonic-gate 	}
1198*7c478bd9Sstevel@tonic-gate 
1199*7c478bd9Sstevel@tonic-gate 	/*
1200*7c478bd9Sstevel@tonic-gate 	 * Move threads one by one to the new scheduling class.
1201*7c478bd9Sstevel@tonic-gate 	 * This never fails because we have all the right
1202*7c478bd9Sstevel@tonic-gate 	 * privileges here.
1203*7c478bd9Sstevel@tonic-gate 	 */
1204*7c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
1205*7c478bd9Sstevel@tonic-gate 	ASSERT(p->p_poolflag & PBWAIT);
1206*7c478bd9Sstevel@tonic-gate 	buf = bufs;
1207*7c478bd9Sstevel@tonic-gate 	t = p->p_tlist;
1208*7c478bd9Sstevel@tonic-gate 	ASSERT(t != NULL);
1209*7c478bd9Sstevel@tonic-gate 	do {
1210*7c478bd9Sstevel@tonic-gate 		if (t->t_cid != cid) {
1211*7c478bd9Sstevel@tonic-gate 			oldcid = t->t_cid;
1212*7c478bd9Sstevel@tonic-gate 			cldata = t->t_cldata;
1213*7c478bd9Sstevel@tonic-gate 			ret = CL_ENTERCLASS(t, cid, NULL, NULL, *buf);
1214*7c478bd9Sstevel@tonic-gate 			ASSERT(ret == 0);
1215*7c478bd9Sstevel@tonic-gate 			CL_EXITCLASS(oldcid, cldata);
1216*7c478bd9Sstevel@tonic-gate 			*buf++ = NULL;
1217*7c478bd9Sstevel@tonic-gate 		}
1218*7c478bd9Sstevel@tonic-gate 	} while ((t = t->t_forw) != p->p_tlist);
1219*7c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
1220*7c478bd9Sstevel@tonic-gate 	/*
1221*7c478bd9Sstevel@tonic-gate 	 * Free unused scheduling class specific buffers.
1222*7c478bd9Sstevel@tonic-gate 	 */
1223*7c478bd9Sstevel@tonic-gate 	for (i = 0, buf = bufs; i < nlwp; i++, buf++) {
1224*7c478bd9Sstevel@tonic-gate 		if (*buf != NULL) {
1225*7c478bd9Sstevel@tonic-gate 			CL_FREE(cid, *buf);
1226*7c478bd9Sstevel@tonic-gate 			*buf = NULL;
1227*7c478bd9Sstevel@tonic-gate 		}
1228*7c478bd9Sstevel@tonic-gate 	}
1229*7c478bd9Sstevel@tonic-gate 	kmem_free(bufs, nlwp * sizeof (void *));
1230*7c478bd9Sstevel@tonic-gate }
1231*7c478bd9Sstevel@tonic-gate 
1232*7c478bd9Sstevel@tonic-gate /*
1233*7c478bd9Sstevel@tonic-gate  * The meat of the bind operation.  The steps in pool_do_bind are:
1234*7c478bd9Sstevel@tonic-gate  *
1235*7c478bd9Sstevel@tonic-gate  * 1) Set PBWAIT in the p_poolflag of any process of interest, and add all
1236*7c478bd9Sstevel@tonic-gate  *    such processes to an array.  For any interesting process that has
1237*7c478bd9Sstevel@tonic-gate  *    threads inside the pool barrier set, increment a counter by the
1238*7c478bd9Sstevel@tonic-gate  *    count of such threads.  Once PBWAIT is set on a process, that process
1239*7c478bd9Sstevel@tonic-gate  *    will not disappear.
1240*7c478bd9Sstevel@tonic-gate  *
1241*7c478bd9Sstevel@tonic-gate  * 2) Wait for the counter from step 2 to drop to zero.  Any process which
1242*7c478bd9Sstevel@tonic-gate  *    calls pool_barrier_exit() and notices that PBWAIT has been set on it
1243*7c478bd9Sstevel@tonic-gate  *    will decrement that counter before going to sleep, and the process
1244*7c478bd9Sstevel@tonic-gate  *    calling pool_barrier_exit() which does the final decrement will wake us.
1245*7c478bd9Sstevel@tonic-gate  *
1246*7c478bd9Sstevel@tonic-gate  * 3) For each interesting process, perform a calculation on it to see if
1247*7c478bd9Sstevel@tonic-gate  *    the bind will actually succeed.  This uses the following three
1248*7c478bd9Sstevel@tonic-gate  *    resource-set-specific functions:
1249*7c478bd9Sstevel@tonic-gate  *
1250*7c478bd9Sstevel@tonic-gate  *    - int set_bind_start(procs, pool)
1251*7c478bd9Sstevel@tonic-gate  *
1252*7c478bd9Sstevel@tonic-gate  *      Determine whether the given array of processes can be bound to the
1253*7c478bd9Sstevel@tonic-gate  *      resource set associated with the given pool.  If it can, take and hold
1254*7c478bd9Sstevel@tonic-gate  *      any locks necessary to ensure that the operation will succeed, and
1255*7c478bd9Sstevel@tonic-gate  *      make any necessary reservations in the target resource set.  If it
1256*7c478bd9Sstevel@tonic-gate  *      can't, return failure with no reservations made and no new locks held.
1257*7c478bd9Sstevel@tonic-gate  *
1258*7c478bd9Sstevel@tonic-gate  *    - void set_bind_abort(procs, pool)
1259*7c478bd9Sstevel@tonic-gate  *
1260*7c478bd9Sstevel@tonic-gate  *      set_bind_start() has completed successfully, but another resource set's
1261*7c478bd9Sstevel@tonic-gate  *      set_bind_start() has failed, and we haven't begun the bind yet.  Undo
1262*7c478bd9Sstevel@tonic-gate  *      any reservations made and drop any locks acquired by our
1263*7c478bd9Sstevel@tonic-gate  *      set_bind_start().
1264*7c478bd9Sstevel@tonic-gate  *
1265*7c478bd9Sstevel@tonic-gate  *    - void set_bind_finish(void)
1266*7c478bd9Sstevel@tonic-gate  *
1267*7c478bd9Sstevel@tonic-gate  *      The bind has completed successfully.  The processes have been released,
1268*7c478bd9Sstevel@tonic-gate  *      and the reservation acquired in set_bind_start() has been depleted as
1269*7c478bd9Sstevel@tonic-gate  *      the processes have finished their bindings.  Drop any locks acquired by
1270*7c478bd9Sstevel@tonic-gate  *      set_bind_start().
1271*7c478bd9Sstevel@tonic-gate  *
1272*7c478bd9Sstevel@tonic-gate  * 4) If we've decided that we can proceed with the bind, iterate through
1273*7c478bd9Sstevel@tonic-gate  *    the list of interesting processes, grab the necessary locks (which
1274*7c478bd9Sstevel@tonic-gate  *    may differ per resource set), perform the bind, and ASSERT that it
1275*7c478bd9Sstevel@tonic-gate  *    succeeds.  Once a process has been rebound, it can be awakened.
1276*7c478bd9Sstevel@tonic-gate  *
1277*7c478bd9Sstevel@tonic-gate  * The operations from step 4 must be kept in sync with anything which might
1278*7c478bd9Sstevel@tonic-gate  * cause the bind operations (e.g., cpupart_bind_thread()) to fail, and
1279*7c478bd9Sstevel@tonic-gate  * are thus located in the same source files as the associated bind operations.
1280*7c478bd9Sstevel@tonic-gate  */
1281*7c478bd9Sstevel@tonic-gate int
1282*7c478bd9Sstevel@tonic-gate pool_do_bind(pool_t *pool, idtype_t idtype, id_t id, int flags)
1283*7c478bd9Sstevel@tonic-gate {
1284*7c478bd9Sstevel@tonic-gate 	extern uint_t nproc;
1285*7c478bd9Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(curthread);
1286*7c478bd9Sstevel@tonic-gate 	proc_t **pp, **procs;
1287*7c478bd9Sstevel@tonic-gate 	proc_t *prstart;
1288*7c478bd9Sstevel@tonic-gate 	int procs_count = 0;
1289*7c478bd9Sstevel@tonic-gate 	kproject_t *kpj;
1290*7c478bd9Sstevel@tonic-gate 	procset_t set;
1291*7c478bd9Sstevel@tonic-gate 	zone_t *zone;
1292*7c478bd9Sstevel@tonic-gate 	int procs_size;
1293*7c478bd9Sstevel@tonic-gate 	int rv = 0;
1294*7c478bd9Sstevel@tonic-gate 	proc_t *p;
1295*7c478bd9Sstevel@tonic-gate 	id_t cid = -1;
1296*7c478bd9Sstevel@tonic-gate 
1297*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
1298*7c478bd9Sstevel@tonic-gate 
1299*7c478bd9Sstevel@tonic-gate 	if ((cid = pool_get_class(pool)) == POOL_CLASS_INVAL)
1300*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
1301*7c478bd9Sstevel@tonic-gate 
1302*7c478bd9Sstevel@tonic-gate 	if (idtype == P_ZONEID) {
1303*7c478bd9Sstevel@tonic-gate 		zone = zone_find_by_id(id);
1304*7c478bd9Sstevel@tonic-gate 		if (zone == NULL)
1305*7c478bd9Sstevel@tonic-gate 			return (ESRCH);
1306*7c478bd9Sstevel@tonic-gate 		if (zone_status_get(zone) > ZONE_IS_RUNNING) {
1307*7c478bd9Sstevel@tonic-gate 			zone_rele(zone);
1308*7c478bd9Sstevel@tonic-gate 			return (EBUSY);
1309*7c478bd9Sstevel@tonic-gate 		}
1310*7c478bd9Sstevel@tonic-gate 	}
1311*7c478bd9Sstevel@tonic-gate 
1312*7c478bd9Sstevel@tonic-gate 	if (idtype == P_PROJID) {
1313*7c478bd9Sstevel@tonic-gate 		kpj = project_hold_by_id(id, GLOBAL_ZONEID, PROJECT_HOLD_FIND);
1314*7c478bd9Sstevel@tonic-gate 		if (kpj == NULL)
1315*7c478bd9Sstevel@tonic-gate 			return (ESRCH);
1316*7c478bd9Sstevel@tonic-gate 		mutex_enter(&kpj->kpj_poolbind);
1317*7c478bd9Sstevel@tonic-gate 	}
1318*7c478bd9Sstevel@tonic-gate 
1319*7c478bd9Sstevel@tonic-gate 	if (idtype == P_PID) {
1320*7c478bd9Sstevel@tonic-gate 		/*
1321*7c478bd9Sstevel@tonic-gate 		 * Fast-path for a single process case.
1322*7c478bd9Sstevel@tonic-gate 		 */
1323*7c478bd9Sstevel@tonic-gate 		procs_size = 2;	/* procs is NULL-terminated */
1324*7c478bd9Sstevel@tonic-gate 		procs = kmem_zalloc(procs_size * sizeof (proc_t *), KM_SLEEP);
1325*7c478bd9Sstevel@tonic-gate 		mutex_enter(&pidlock);
1326*7c478bd9Sstevel@tonic-gate 	} else {
1327*7c478bd9Sstevel@tonic-gate 		/*
1328*7c478bd9Sstevel@tonic-gate 		 * We will need enough slots for proc_t pointers for as many as
1329*7c478bd9Sstevel@tonic-gate 		 * twice the number of currently running processes (assuming
1330*7c478bd9Sstevel@tonic-gate 		 * that each one could be in fork() creating a new child).
1331*7c478bd9Sstevel@tonic-gate 		 */
1332*7c478bd9Sstevel@tonic-gate 		for (;;) {
1333*7c478bd9Sstevel@tonic-gate 			procs_size = nproc * 2;
1334*7c478bd9Sstevel@tonic-gate 			procs = kmem_zalloc(procs_size * sizeof (proc_t *),
1335*7c478bd9Sstevel@tonic-gate 			    KM_SLEEP);
1336*7c478bd9Sstevel@tonic-gate 			mutex_enter(&pidlock);
1337*7c478bd9Sstevel@tonic-gate 
1338*7c478bd9Sstevel@tonic-gate 			if (nproc * 2 <= procs_size)
1339*7c478bd9Sstevel@tonic-gate 				break;
1340*7c478bd9Sstevel@tonic-gate 			/*
1341*7c478bd9Sstevel@tonic-gate 			 * If nproc has changed, try again.
1342*7c478bd9Sstevel@tonic-gate 			 */
1343*7c478bd9Sstevel@tonic-gate 			mutex_exit(&pidlock);
1344*7c478bd9Sstevel@tonic-gate 			kmem_free(procs, procs_size * sizeof (proc_t *));
1345*7c478bd9Sstevel@tonic-gate 		}
1346*7c478bd9Sstevel@tonic-gate 	}
1347*7c478bd9Sstevel@tonic-gate 
1348*7c478bd9Sstevel@tonic-gate 	if (id == P_MYID)
1349*7c478bd9Sstevel@tonic-gate 		id = getmyid(idtype);
1350*7c478bd9Sstevel@tonic-gate 	setprocset(&set, POP_AND, idtype, id, P_ALL, 0);
1351*7c478bd9Sstevel@tonic-gate 
1352*7c478bd9Sstevel@tonic-gate 	/*
1353*7c478bd9Sstevel@tonic-gate 	 * Do a first scan, and select target processes.
1354*7c478bd9Sstevel@tonic-gate 	 */
1355*7c478bd9Sstevel@tonic-gate 	if (idtype == P_PID)
1356*7c478bd9Sstevel@tonic-gate 		prstart = prfind(id);
1357*7c478bd9Sstevel@tonic-gate 	else
1358*7c478bd9Sstevel@tonic-gate 		prstart = practive;
1359*7c478bd9Sstevel@tonic-gate 	for (p = prstart, pp = procs; p != NULL; p = p->p_next) {
1360*7c478bd9Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
1361*7c478bd9Sstevel@tonic-gate 		/*
1362*7c478bd9Sstevel@tonic-gate 		 * Skip processes that don't match our (id, idtype) set or
1363*7c478bd9Sstevel@tonic-gate 		 * on the way of becoming zombies.  Skip kernel processes
1364*7c478bd9Sstevel@tonic-gate 		 * from the global zone.
1365*7c478bd9Sstevel@tonic-gate 		 */
1366*7c478bd9Sstevel@tonic-gate 		if (procinset(p, &set) == 0 ||
1367*7c478bd9Sstevel@tonic-gate 		    p->p_poolflag & PEXITED ||
1368*7c478bd9Sstevel@tonic-gate 		    ((p->p_flag & SSYS) && INGLOBALZONE(p))) {
1369*7c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
1370*7c478bd9Sstevel@tonic-gate 			continue;
1371*7c478bd9Sstevel@tonic-gate 		}
1372*7c478bd9Sstevel@tonic-gate 		if (!INGLOBALZONE(p)) {
1373*7c478bd9Sstevel@tonic-gate 			switch (idtype) {
1374*7c478bd9Sstevel@tonic-gate 			case P_PID:
1375*7c478bd9Sstevel@tonic-gate 			case P_TASKID:
1376*7c478bd9Sstevel@tonic-gate 				/*
1377*7c478bd9Sstevel@tonic-gate 				 * Can't bind processes or tasks
1378*7c478bd9Sstevel@tonic-gate 				 * in local zones to pools.
1379*7c478bd9Sstevel@tonic-gate 				 */
1380*7c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
1381*7c478bd9Sstevel@tonic-gate 				mutex_exit(&pidlock);
1382*7c478bd9Sstevel@tonic-gate 				pool_bind_wakeall(procs);
1383*7c478bd9Sstevel@tonic-gate 				rv = EINVAL;
1384*7c478bd9Sstevel@tonic-gate 				goto out;
1385*7c478bd9Sstevel@tonic-gate 			case P_PROJID:
1386*7c478bd9Sstevel@tonic-gate 				/*
1387*7c478bd9Sstevel@tonic-gate 				 * Only projects in the global
1388*7c478bd9Sstevel@tonic-gate 				 * zone can be rebound.
1389*7c478bd9Sstevel@tonic-gate 				 */
1390*7c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
1391*7c478bd9Sstevel@tonic-gate 				continue;
1392*7c478bd9Sstevel@tonic-gate 			case P_POOLID:
1393*7c478bd9Sstevel@tonic-gate 				/*
1394*7c478bd9Sstevel@tonic-gate 				 * When rebinding pools, processes can be
1395*7c478bd9Sstevel@tonic-gate 				 * in different zones.
1396*7c478bd9Sstevel@tonic-gate 				 */
1397*7c478bd9Sstevel@tonic-gate 				break;
1398*7c478bd9Sstevel@tonic-gate 			}
1399*7c478bd9Sstevel@tonic-gate 		}
1400*7c478bd9Sstevel@tonic-gate 
1401*7c478bd9Sstevel@tonic-gate 		p->p_poolflag |= PBWAIT;
1402*7c478bd9Sstevel@tonic-gate 		/*
1403*7c478bd9Sstevel@tonic-gate 		 * If some threads in this process are inside the pool
1404*7c478bd9Sstevel@tonic-gate 		 * barrier, add them to pool_barrier_count, as we have
1405*7c478bd9Sstevel@tonic-gate 		 * to wait for all of them to exit the barrier.
1406*7c478bd9Sstevel@tonic-gate 		 */
1407*7c478bd9Sstevel@tonic-gate 		if (p->p_poolcnt > 0) {
1408*7c478bd9Sstevel@tonic-gate 			mutex_enter(&pool_barrier_lock);
1409*7c478bd9Sstevel@tonic-gate 			pool_barrier_count += p->p_poolcnt;
1410*7c478bd9Sstevel@tonic-gate 			mutex_exit(&pool_barrier_lock);
1411*7c478bd9Sstevel@tonic-gate 		}
1412*7c478bd9Sstevel@tonic-gate 		ASSERT(pp < &procs[procs_size]);
1413*7c478bd9Sstevel@tonic-gate 		*pp++ = p;
1414*7c478bd9Sstevel@tonic-gate 		procs_count++;
1415*7c478bd9Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
1416*7c478bd9Sstevel@tonic-gate 
1417*7c478bd9Sstevel@tonic-gate 		/*
1418*7c478bd9Sstevel@tonic-gate 		 * We just found our process, so if we're only rebinding a
1419*7c478bd9Sstevel@tonic-gate 		 * single process then get out of this loop.
1420*7c478bd9Sstevel@tonic-gate 		 */
1421*7c478bd9Sstevel@tonic-gate 		if (idtype == P_PID)
1422*7c478bd9Sstevel@tonic-gate 			break;
1423*7c478bd9Sstevel@tonic-gate 	}
1424*7c478bd9Sstevel@tonic-gate 	*pp = NULL;	/* cap off the end of the array */
1425*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
1426*7c478bd9Sstevel@tonic-gate 
1427*7c478bd9Sstevel@tonic-gate 	/*
1428*7c478bd9Sstevel@tonic-gate 	 * Wait for relevant processes to stop before they try to enter the
1429*7c478bd9Sstevel@tonic-gate 	 * barrier or at the exit from the barrier.  Make sure that we do
1430*7c478bd9Sstevel@tonic-gate 	 * not get stopped here while we're holding pool_lock.  If we were
1431*7c478bd9Sstevel@tonic-gate 	 * requested to stop, or got a signal then return EAGAIN to let the
1432*7c478bd9Sstevel@tonic-gate 	 * library know that it needs to retry.
1433*7c478bd9Sstevel@tonic-gate 	 */
1434*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pool_barrier_lock);
1435*7c478bd9Sstevel@tonic-gate 	lwp->lwp_nostop++;
1436*7c478bd9Sstevel@tonic-gate 	while (pool_barrier_count > 0) {
1437*7c478bd9Sstevel@tonic-gate 		(void) cv_wait_sig(&pool_barrier_cv, &pool_barrier_lock);
1438*7c478bd9Sstevel@tonic-gate 		if (pool_barrier_count > 0) {
1439*7c478bd9Sstevel@tonic-gate 			/*
1440*7c478bd9Sstevel@tonic-gate 			 * We either got a signal or were requested to
1441*7c478bd9Sstevel@tonic-gate 			 * stop by /proc.  Bail out with EAGAIN.  If we were
1442*7c478bd9Sstevel@tonic-gate 			 * requested to stop, we'll stop in post_syscall()
1443*7c478bd9Sstevel@tonic-gate 			 * on our way back to userland.
1444*7c478bd9Sstevel@tonic-gate 			 */
1445*7c478bd9Sstevel@tonic-gate 			mutex_exit(&pool_barrier_lock);
1446*7c478bd9Sstevel@tonic-gate 			pool_bind_wakeall(procs);
1447*7c478bd9Sstevel@tonic-gate 			lwp->lwp_nostop--;
1448*7c478bd9Sstevel@tonic-gate 			rv = EAGAIN;
1449*7c478bd9Sstevel@tonic-gate 			goto out;
1450*7c478bd9Sstevel@tonic-gate 		}
1451*7c478bd9Sstevel@tonic-gate 	}
1452*7c478bd9Sstevel@tonic-gate 	lwp->lwp_nostop--;
1453*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pool_barrier_lock);
1454*7c478bd9Sstevel@tonic-gate 
1455*7c478bd9Sstevel@tonic-gate 	if (idtype == P_PID)
1456*7c478bd9Sstevel@tonic-gate 		goto skip;
1457*7c478bd9Sstevel@tonic-gate 
1458*7c478bd9Sstevel@tonic-gate 	/*
1459*7c478bd9Sstevel@tonic-gate 	 * Do another run, and drop processes that were inside the barrier
1460*7c478bd9Sstevel@tonic-gate 	 * in exit(), but when they have dropped to pool_barrier_exit
1461*7c478bd9Sstevel@tonic-gate 	 * they have become of no interest to us.  Pick up child processes that
1462*7c478bd9Sstevel@tonic-gate 	 * were created by fork() but didn't exist during our first scan.
1463*7c478bd9Sstevel@tonic-gate 	 * Their parents are now stopped at pool_barrier_exit in cfork().
1464*7c478bd9Sstevel@tonic-gate 	 */
1465*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pidlock);
1466*7c478bd9Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++) {
1467*7c478bd9Sstevel@tonic-gate 		if (p->p_poolflag & PEXITED) {
1468*7c478bd9Sstevel@tonic-gate 			ASSERT(p->p_lwpcnt == 0);
1469*7c478bd9Sstevel@tonic-gate 			pool_bind_wake(p);
1470*7c478bd9Sstevel@tonic-gate 			/* flip w/last non-NULL slot */
1471*7c478bd9Sstevel@tonic-gate 			*pp = procs[procs_count - 1];
1472*7c478bd9Sstevel@tonic-gate 			procs[procs_count - 1] = NULL;
1473*7c478bd9Sstevel@tonic-gate 			procs_count--;
1474*7c478bd9Sstevel@tonic-gate 			pp--;			/* try this slot again */
1475*7c478bd9Sstevel@tonic-gate 			continue;
1476*7c478bd9Sstevel@tonic-gate 		}
1477*7c478bd9Sstevel@tonic-gate 		/*
1478*7c478bd9Sstevel@tonic-gate 		 * Look at the child and check if it should be rebound also.
1479*7c478bd9Sstevel@tonic-gate 		 * We're holding pidlock, so it is safe to reference p_child.
1480*7c478bd9Sstevel@tonic-gate 		 */
1481*7c478bd9Sstevel@tonic-gate 		if ((p = p->p_child) == NULL)
1482*7c478bd9Sstevel@tonic-gate 			continue;
1483*7c478bd9Sstevel@tonic-gate 
1484*7c478bd9Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
1485*7c478bd9Sstevel@tonic-gate 		/*
1486*7c478bd9Sstevel@tonic-gate 		 * Skip processes in local zones if we're not binding
1487*7c478bd9Sstevel@tonic-gate 		 * zones to pools (P_ZONEID).  Skip kernel processes also.
1488*7c478bd9Sstevel@tonic-gate 		 */
1489*7c478bd9Sstevel@tonic-gate 		if ((!INGLOBALZONE(p) && idtype != P_ZONEID) ||
1490*7c478bd9Sstevel@tonic-gate 		    p->p_flag & SSYS) {
1491*7c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
1492*7c478bd9Sstevel@tonic-gate 			continue;
1493*7c478bd9Sstevel@tonic-gate 		}
1494*7c478bd9Sstevel@tonic-gate 
1495*7c478bd9Sstevel@tonic-gate 		/*
1496*7c478bd9Sstevel@tonic-gate 		 * If the child process has been already created by fork(), has
1497*7c478bd9Sstevel@tonic-gate 		 * not exited, and has not been added to the list already,
1498*7c478bd9Sstevel@tonic-gate 		 * then add it now.  We will hit this process again (since we
1499*7c478bd9Sstevel@tonic-gate 		 * stick it at the end of the procs list) but it will ignored
1500*7c478bd9Sstevel@tonic-gate 		 * because it will have the PBWAIT flag set.
1501*7c478bd9Sstevel@tonic-gate 		 */
1502*7c478bd9Sstevel@tonic-gate 		if (procinset(p, &set) &&
1503*7c478bd9Sstevel@tonic-gate 		    !(p->p_poolflag & PEXITED) &&
1504*7c478bd9Sstevel@tonic-gate 		    !(p->p_poolflag & PBWAIT)) {
1505*7c478bd9Sstevel@tonic-gate 			ASSERT(p->p_child == NULL); /* no child of a child */
1506*7c478bd9Sstevel@tonic-gate 			procs[procs_count] = p;
1507*7c478bd9Sstevel@tonic-gate 			procs[procs_count + 1] = NULL;
1508*7c478bd9Sstevel@tonic-gate 			procs_count++;
1509*7c478bd9Sstevel@tonic-gate 			p->p_poolflag |= PBWAIT;
1510*7c478bd9Sstevel@tonic-gate 		}
1511*7c478bd9Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
1512*7c478bd9Sstevel@tonic-gate 	}
1513*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
1514*7c478bd9Sstevel@tonic-gate skip:
1515*7c478bd9Sstevel@tonic-gate 	/*
1516*7c478bd9Sstevel@tonic-gate 	 * If there's no processes to rebind then return ESRCH, unless
1517*7c478bd9Sstevel@tonic-gate 	 * we're associating a pool with new resource set, destroying it,
1518*7c478bd9Sstevel@tonic-gate 	 * or binding a zone to a pool.
1519*7c478bd9Sstevel@tonic-gate 	 */
1520*7c478bd9Sstevel@tonic-gate 	if (procs_count == 0) {
1521*7c478bd9Sstevel@tonic-gate 		if (idtype == P_POOLID || idtype == P_ZONEID)
1522*7c478bd9Sstevel@tonic-gate 			rv = 0;
1523*7c478bd9Sstevel@tonic-gate 		else
1524*7c478bd9Sstevel@tonic-gate 			rv = ESRCH;
1525*7c478bd9Sstevel@tonic-gate 		goto out;
1526*7c478bd9Sstevel@tonic-gate 	}
1527*7c478bd9Sstevel@tonic-gate 
1528*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1529*7c478bd9Sstevel@tonic-gate 	/*
1530*7c478bd9Sstevel@tonic-gate 	 * All processes in the array should have PBWAIT set, and none should
1531*7c478bd9Sstevel@tonic-gate 	 * be in the critical section.  Even though p_poolflag is protected by
1532*7c478bd9Sstevel@tonic-gate 	 * the p_lock, these assertions should be stable across the dropping of
1533*7c478bd9Sstevel@tonic-gate 	 * p_lock.
1534*7c478bd9Sstevel@tonic-gate 	 */
1535*7c478bd9Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++) {
1536*7c478bd9Sstevel@tonic-gate 		ASSERT(p->p_poolflag & PBWAIT);
1537*7c478bd9Sstevel@tonic-gate 		ASSERT(p->p_poolcnt == 0);
1538*7c478bd9Sstevel@tonic-gate 		ASSERT(procinset(p, &set));
1539*7c478bd9Sstevel@tonic-gate 	}
1540*7c478bd9Sstevel@tonic-gate #endif
1541*7c478bd9Sstevel@tonic-gate 
1542*7c478bd9Sstevel@tonic-gate 	/*
1543*7c478bd9Sstevel@tonic-gate 	 * Do the check if processor set rebinding is going to succeed or not.
1544*7c478bd9Sstevel@tonic-gate 	 */
1545*7c478bd9Sstevel@tonic-gate 	if ((flags & POOL_BIND_PSET) &&
1546*7c478bd9Sstevel@tonic-gate 	    (rv = pset_bind_start(procs, pool)) != 0) {
1547*7c478bd9Sstevel@tonic-gate 		pool_bind_wakeall(procs);
1548*7c478bd9Sstevel@tonic-gate 		goto out;
1549*7c478bd9Sstevel@tonic-gate 	}
1550*7c478bd9Sstevel@tonic-gate 
1551*7c478bd9Sstevel@tonic-gate 	/*
1552*7c478bd9Sstevel@tonic-gate 	 * At this point, all bind operations should succeed.
1553*7c478bd9Sstevel@tonic-gate 	 */
1554*7c478bd9Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++) {
1555*7c478bd9Sstevel@tonic-gate 		if (flags & POOL_BIND_PSET) {
1556*7c478bd9Sstevel@tonic-gate 			psetid_t psetid = pool->pool_pset->pset_id;
1557*7c478bd9Sstevel@tonic-gate 			void *zonebuf;
1558*7c478bd9Sstevel@tonic-gate 			void *projbuf;
1559*7c478bd9Sstevel@tonic-gate 
1560*7c478bd9Sstevel@tonic-gate 			/*
1561*7c478bd9Sstevel@tonic-gate 			 * Pre-allocate one buffer for FSS (per-project
1562*7c478bd9Sstevel@tonic-gate 			 * buffer for a new pset) in case if this is the
1563*7c478bd9Sstevel@tonic-gate 			 * first thread from its current project getting
1564*7c478bd9Sstevel@tonic-gate 			 * bound to this processor set.
1565*7c478bd9Sstevel@tonic-gate 			 */
1566*7c478bd9Sstevel@tonic-gate 			projbuf = fss_allocbuf(FSS_ONE_BUF, FSS_ALLOC_PROJ);
1567*7c478bd9Sstevel@tonic-gate 			zonebuf = fss_allocbuf(FSS_ONE_BUF, FSS_ALLOC_ZONE);
1568*7c478bd9Sstevel@tonic-gate 
1569*7c478bd9Sstevel@tonic-gate 			mutex_enter(&pidlock);
1570*7c478bd9Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
1571*7c478bd9Sstevel@tonic-gate 			pool_pset_bind(p, psetid, projbuf, zonebuf);
1572*7c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
1573*7c478bd9Sstevel@tonic-gate 			mutex_exit(&pidlock);
1574*7c478bd9Sstevel@tonic-gate 			/*
1575*7c478bd9Sstevel@tonic-gate 			 * Free buffers pre-allocated above if it
1576*7c478bd9Sstevel@tonic-gate 			 * wasn't actually used.
1577*7c478bd9Sstevel@tonic-gate 			 */
1578*7c478bd9Sstevel@tonic-gate 			fss_freebuf(projbuf, FSS_ALLOC_PROJ);
1579*7c478bd9Sstevel@tonic-gate 			fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
1580*7c478bd9Sstevel@tonic-gate 		}
1581*7c478bd9Sstevel@tonic-gate 		/*
1582*7c478bd9Sstevel@tonic-gate 		 * Now let's change the scheduling class of this
1583*7c478bd9Sstevel@tonic-gate 		 * process if our target pool has it defined.
1584*7c478bd9Sstevel@tonic-gate 		 */
1585*7c478bd9Sstevel@tonic-gate 		if (cid != POOL_CLASS_UNSET)
1586*7c478bd9Sstevel@tonic-gate 			pool_change_class(p, cid);
1587*7c478bd9Sstevel@tonic-gate 
1588*7c478bd9Sstevel@tonic-gate 		/*
1589*7c478bd9Sstevel@tonic-gate 		 * It is safe to reference p_pool here without holding
1590*7c478bd9Sstevel@tonic-gate 		 * p_lock because it cannot change underneath of us.
1591*7c478bd9Sstevel@tonic-gate 		 * We're holding pool_lock here, so nobody else can be
1592*7c478bd9Sstevel@tonic-gate 		 * moving this process between pools.  If process "p"
1593*7c478bd9Sstevel@tonic-gate 		 * would be exiting, we're guaranteed that it would be blocked
1594*7c478bd9Sstevel@tonic-gate 		 * at pool_barrier_enter() in exit().  Otherwise, it would've
1595*7c478bd9Sstevel@tonic-gate 		 * been skipped by one of our scans of the practive list
1596*7c478bd9Sstevel@tonic-gate 		 * as a process with PEXITED flag set.
1597*7c478bd9Sstevel@tonic-gate 		 */
1598*7c478bd9Sstevel@tonic-gate 		if (p->p_pool != pool) {
1599*7c478bd9Sstevel@tonic-gate 			ASSERT(p->p_pool->pool_ref > 0);
1600*7c478bd9Sstevel@tonic-gate 			atomic_add_32(&p->p_pool->pool_ref, -1);
1601*7c478bd9Sstevel@tonic-gate 			p->p_pool = pool;
1602*7c478bd9Sstevel@tonic-gate 			atomic_add_32(&p->p_pool->pool_ref, 1);
1603*7c478bd9Sstevel@tonic-gate 		}
1604*7c478bd9Sstevel@tonic-gate 		/*
1605*7c478bd9Sstevel@tonic-gate 		 * Okay, we've tortured this guy enough.
1606*7c478bd9Sstevel@tonic-gate 		 * Let this poor process go now.
1607*7c478bd9Sstevel@tonic-gate 		 */
1608*7c478bd9Sstevel@tonic-gate 		pool_bind_wake(p);
1609*7c478bd9Sstevel@tonic-gate 	}
1610*7c478bd9Sstevel@tonic-gate 	if (flags & POOL_BIND_PSET)
1611*7c478bd9Sstevel@tonic-gate 		pset_bind_finish();
1612*7c478bd9Sstevel@tonic-gate 
1613*7c478bd9Sstevel@tonic-gate out:	switch (idtype) {
1614*7c478bd9Sstevel@tonic-gate 	case P_PROJID:
1615*7c478bd9Sstevel@tonic-gate 		ASSERT(kpj != NULL);
1616*7c478bd9Sstevel@tonic-gate 		mutex_exit(&kpj->kpj_poolbind);
1617*7c478bd9Sstevel@tonic-gate 		project_rele(kpj);
1618*7c478bd9Sstevel@tonic-gate 		break;
1619*7c478bd9Sstevel@tonic-gate 	case P_ZONEID:
1620*7c478bd9Sstevel@tonic-gate 		if (rv == 0) {
1621*7c478bd9Sstevel@tonic-gate 			mutex_enter(&cpu_lock);
1622*7c478bd9Sstevel@tonic-gate 			zone_pool_set(zone, pool);
1623*7c478bd9Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
1624*7c478bd9Sstevel@tonic-gate 		}
1625*7c478bd9Sstevel@tonic-gate 		zone->zone_pool_mod = gethrtime();
1626*7c478bd9Sstevel@tonic-gate 		zone_rele(zone);
1627*7c478bd9Sstevel@tonic-gate 		break;
1628*7c478bd9Sstevel@tonic-gate 	}
1629*7c478bd9Sstevel@tonic-gate 
1630*7c478bd9Sstevel@tonic-gate 	kmem_free(procs, procs_size * sizeof (proc_t *));
1631*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_barrier_count == 0);
1632*7c478bd9Sstevel@tonic-gate 	return (rv);
1633*7c478bd9Sstevel@tonic-gate }
1634