xref: /titanic_50/usr/src/uts/common/os/pool.c (revision 1a5e258f5471356ca102c7176637cdce45bac147)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
522942fabSgm149974  * Common Development and Distribution License (the "License").
622942fabSgm149974  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
21d4204c85Sraf 
227c478bd9Sstevel@tonic-gate /*
233b132919SMichael Lim  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/pool.h>
277c478bd9Sstevel@tonic-gate #include <sys/pool_impl.h>
287c478bd9Sstevel@tonic-gate #include <sys/pool_pset.h>
297c478bd9Sstevel@tonic-gate #include <sys/id_space.h>
307c478bd9Sstevel@tonic-gate #include <sys/mutex.h>
317c478bd9Sstevel@tonic-gate #include <sys/nvpair.h>
327c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
337c478bd9Sstevel@tonic-gate #include <sys/errno.h>
347c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
357c478bd9Sstevel@tonic-gate #include <sys/systm.h>
367c478bd9Sstevel@tonic-gate #include <sys/proc.h>
377c478bd9Sstevel@tonic-gate #include <sys/fss.h>
387c478bd9Sstevel@tonic-gate #include <sys/class.h>
397c478bd9Sstevel@tonic-gate #include <sys/exacct.h>
407c478bd9Sstevel@tonic-gate #include <sys/utsname.h>
417c478bd9Sstevel@tonic-gate #include <sys/procset.h>
427c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
437c478bd9Sstevel@tonic-gate #include <sys/zone.h>
447c478bd9Sstevel@tonic-gate #include <sys/policy.h>
45d4204c85Sraf #include <sys/schedctl.h>
460dc2366fSVenugopal Iyer #include <sys/taskq.h>
477c478bd9Sstevel@tonic-gate 
487c478bd9Sstevel@tonic-gate /*
497c478bd9Sstevel@tonic-gate  * RESOURCE POOLS
507c478bd9Sstevel@tonic-gate  *
517c478bd9Sstevel@tonic-gate  * The resource pools facility brings together process-bindable resource into
527c478bd9Sstevel@tonic-gate  * a common abstraction called a pool. Processor sets and other entities can
537c478bd9Sstevel@tonic-gate  * be configured, grouped, and labelled such that workload components can be
547c478bd9Sstevel@tonic-gate  * associated with a subset of a system's total resources.
557c478bd9Sstevel@tonic-gate  *
567c478bd9Sstevel@tonic-gate  * When disabled, the pools facility is "invisible".  All processes belong
577c478bd9Sstevel@tonic-gate  * to the same pool (pool_default), and processor sets can be managed through
587c478bd9Sstevel@tonic-gate  * the old pset() system call.  When enabled, processor sets can only be
597c478bd9Sstevel@tonic-gate  * managed via the pools facility.  New pools can be created and associated
607c478bd9Sstevel@tonic-gate  * with processor sets.  Processes can be bound to pools which have non-empty
617c478bd9Sstevel@tonic-gate  * resource sets.
627c478bd9Sstevel@tonic-gate  *
637c478bd9Sstevel@tonic-gate  * Locking: pool_lock() protects global pools state and must be called
647c478bd9Sstevel@tonic-gate  * before modifying the configuration, or when taking a snapshot of the
657c478bd9Sstevel@tonic-gate  * configuration.  If pool_lock_intr() is used, the operation may be
667c478bd9Sstevel@tonic-gate  * interrupted by a signal or a request.
677c478bd9Sstevel@tonic-gate  *
687c478bd9Sstevel@tonic-gate  * To prevent processes from being rebound between pools while they are
697c478bd9Sstevel@tonic-gate  * the middle of an operation which affects resource set bindings, such
707c478bd9Sstevel@tonic-gate  * operations must be surrounded by calls to pool_barrier_enter() and
717c478bd9Sstevel@tonic-gate  * pool_barrier_exit().  This mechanism guarantees that such processes will
727c478bd9Sstevel@tonic-gate  * be stopped either at the beginning or at the end of the barrier so that
737c478bd9Sstevel@tonic-gate  * the rebind operation can atomically bind the process and its threads
747c478bd9Sstevel@tonic-gate  * to new resource sets, and then let process run again.
757c478bd9Sstevel@tonic-gate  *
767c478bd9Sstevel@tonic-gate  * Lock ordering with respect to other locks is as follows:
777c478bd9Sstevel@tonic-gate  *
787c478bd9Sstevel@tonic-gate  * 	pool_lock() -> cpu_lock -> pidlock -> p_lock -> pool_barrier_lock
797c478bd9Sstevel@tonic-gate  *
807c478bd9Sstevel@tonic-gate  * Most static and global variables defined in this file are protected
817c478bd9Sstevel@tonic-gate  * by calling pool_lock().
827c478bd9Sstevel@tonic-gate  *
837c478bd9Sstevel@tonic-gate  * The operation that binds tasks and projects to pools is atomic.  That is,
847c478bd9Sstevel@tonic-gate  * either all processes in a given task or a project will be bound to a
857c478bd9Sstevel@tonic-gate  * new pool, or (in case of an error) they will be all left bound to the
867c478bd9Sstevel@tonic-gate  * old pool. Processes in a given task or a given project can only be bound to
877c478bd9Sstevel@tonic-gate  * different pools if they were rebound individually one by one as single
887c478bd9Sstevel@tonic-gate  * processes.  Threads or LWPs of the same process do not have pool bindings,
897c478bd9Sstevel@tonic-gate  * and are bound to the same resource sets associated with the resource pool
907c478bd9Sstevel@tonic-gate  * of that process.
917c478bd9Sstevel@tonic-gate  *
927c478bd9Sstevel@tonic-gate  * The following picture shows one possible pool configuration with three
937c478bd9Sstevel@tonic-gate  * pools and three processor sets.  Note that processor set "foo" is not
947c478bd9Sstevel@tonic-gate  * associated with any pools and therefore cannot have any processes
957c478bd9Sstevel@tonic-gate  * bound to it.  Two pools (default and foo) are associated with the
967c478bd9Sstevel@tonic-gate  * same processor set (default).  Also, note that processes in Task 2
977c478bd9Sstevel@tonic-gate  * are bound to different pools.
987c478bd9Sstevel@tonic-gate  *
997c478bd9Sstevel@tonic-gate  *
1007c478bd9Sstevel@tonic-gate  *							       Processor Sets
1017c478bd9Sstevel@tonic-gate  *								+---------+
1027c478bd9Sstevel@tonic-gate  *		       +--------------+========================>| default |
1037c478bd9Sstevel@tonic-gate  *		      a|	      |				+---------+
1047c478bd9Sstevel@tonic-gate  *		      s|	      |				    ||
1057c478bd9Sstevel@tonic-gate  *		      s|	      |				+---------+
1067c478bd9Sstevel@tonic-gate  *		      o|	      |				|   foo   |
1077c478bd9Sstevel@tonic-gate  *		      c|	      |				+---------+
1087c478bd9Sstevel@tonic-gate  *		      i|	      |				    ||
1097c478bd9Sstevel@tonic-gate  *		      a|	      |				+---------+
1107c478bd9Sstevel@tonic-gate  *		      t|	      |			+------>|   bar   |
1117c478bd9Sstevel@tonic-gate  *		      e|	      |			|	+---------+
1127c478bd9Sstevel@tonic-gate  *                    d|              |                 |
1137c478bd9Sstevel@tonic-gate  *                     |              |                 |
1147c478bd9Sstevel@tonic-gate  *	       +---------+      +---------+      +---------+
1157c478bd9Sstevel@tonic-gate  *     Pools   | default |======|   foo   |======|   bar   |
1167c478bd9Sstevel@tonic-gate  *	       +---------+      +---------+      +---------+
1177c478bd9Sstevel@tonic-gate  *	           @  @            @              @ @   @
1187c478bd9Sstevel@tonic-gate  *                b|  |            |              | |   |
1197c478bd9Sstevel@tonic-gate  *                o|  |            |              | |   |
1207c478bd9Sstevel@tonic-gate  *                u|  +-----+      |      +-------+ |   +---+
1217c478bd9Sstevel@tonic-gate  *                n|        |      |      |         |       |
1227c478bd9Sstevel@tonic-gate  *            ....d|........|......|......|.........|.......|....
1237c478bd9Sstevel@tonic-gate  *            :    |   ::   |      |      |    ::   |       |   :
1247c478bd9Sstevel@tonic-gate  *            :  +---+ :: +---+  +---+  +---+  :: +---+   +---+ :
1257c478bd9Sstevel@tonic-gate  *  Processes :  | p | :: | p |  | p |  | p |  :: | p |...| p | :
1267c478bd9Sstevel@tonic-gate  *            :  +---+ :: +---+  +---+  +---+  :: +---+   +---+ :
1277c478bd9Sstevel@tonic-gate  *            :........::......................::...............:
1287c478bd9Sstevel@tonic-gate  *              Task 1            Task 2              Task N
1297c478bd9Sstevel@tonic-gate  *                 |                 |                  |
1307c478bd9Sstevel@tonic-gate  *                 |                 |                  |
1317c478bd9Sstevel@tonic-gate  *                 |  +-----------+  |             +-----------+
1327c478bd9Sstevel@tonic-gate  *                 +--| Project 1 |--+             | Project N |
1337c478bd9Sstevel@tonic-gate  *                    +-----------+                +-----------+
1347c478bd9Sstevel@tonic-gate  *
1357c478bd9Sstevel@tonic-gate  * This is just an illustration of relationships between processes, tasks,
1367c478bd9Sstevel@tonic-gate  * projects, pools, and processor sets. New types of resource sets will be
1377c478bd9Sstevel@tonic-gate  * added in the future.
1387c478bd9Sstevel@tonic-gate  */
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate pool_t		*pool_default;	/* default pool which always exists */
1417c478bd9Sstevel@tonic-gate int		pool_count;	/* number of pools created on this system */
1427c478bd9Sstevel@tonic-gate int		pool_state;	/* pools state -- enabled/disabled */
1437c478bd9Sstevel@tonic-gate void		*pool_buf;	/* pre-commit snapshot of the pools state */
1447c478bd9Sstevel@tonic-gate size_t		pool_bufsz;	/* size of pool_buf */
1457c478bd9Sstevel@tonic-gate static hrtime_t	pool_pool_mod;	/* last modification time for pools */
1467c478bd9Sstevel@tonic-gate static hrtime_t	pool_sys_mod;	/* last modification time for system */
1477c478bd9Sstevel@tonic-gate static nvlist_t	*pool_sys_prop;	/* system properties */
1487c478bd9Sstevel@tonic-gate static id_space_t *pool_ids;	/* pool ID space */
1497c478bd9Sstevel@tonic-gate static list_t	pool_list;	/* doubly-linked list of pools */
1507c478bd9Sstevel@tonic-gate static kmutex_t		pool_mutex;		/* protects pool_busy_* */
1517c478bd9Sstevel@tonic-gate static kcondvar_t	pool_busy_cv;		/* waiting for "pool_lock" */
1527c478bd9Sstevel@tonic-gate static kthread_t	*pool_busy_thread;	/* thread holding "pool_lock" */
1537c478bd9Sstevel@tonic-gate static kmutex_t		pool_barrier_lock;	/* synch. with pool_barrier_* */
1547c478bd9Sstevel@tonic-gate static kcondvar_t	pool_barrier_cv;	/* synch. with pool_barrier_* */
1557c478bd9Sstevel@tonic-gate static int		pool_barrier_count;	/* synch. with pool_barrier_* */
1560dc2366fSVenugopal Iyer static list_t		pool_event_cb_list;	/* pool event callbacks */
1570dc2366fSVenugopal Iyer static boolean_t	pool_event_cb_init = B_FALSE;
1580dc2366fSVenugopal Iyer static kmutex_t		pool_event_cb_lock;
1590dc2366fSVenugopal Iyer static taskq_t		*pool_event_cb_taskq = NULL;
1600dc2366fSVenugopal Iyer 
1610dc2366fSVenugopal Iyer void pool_event_dispatch(pool_event_t, poolid_t);
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate /*
1647c478bd9Sstevel@tonic-gate  * Boot-time pool initialization.
1657c478bd9Sstevel@tonic-gate  */
1667c478bd9Sstevel@tonic-gate void
pool_init(void)1677c478bd9Sstevel@tonic-gate pool_init(void)
1687c478bd9Sstevel@tonic-gate {
1697c478bd9Sstevel@tonic-gate 	pool_ids = id_space_create("pool_ids", POOL_DEFAULT + 1, POOL_MAXID);
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate 	/*
1727c478bd9Sstevel@tonic-gate 	 * Initialize default pool.
1737c478bd9Sstevel@tonic-gate 	 */
1747c478bd9Sstevel@tonic-gate 	pool_default = kmem_zalloc(sizeof (pool_t), KM_SLEEP);
1757c478bd9Sstevel@tonic-gate 	pool_default->pool_id = POOL_DEFAULT;
1767c478bd9Sstevel@tonic-gate 	list_create(&pool_list, sizeof (pool_t), offsetof(pool_t, pool_link));
1777c478bd9Sstevel@tonic-gate 	list_insert_head(&pool_list, pool_default);
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate 	/*
1807c478bd9Sstevel@tonic-gate 	 * Initialize plugins for resource sets.
1817c478bd9Sstevel@tonic-gate 	 */
1827c478bd9Sstevel@tonic-gate 	pool_pset_init();
1837c478bd9Sstevel@tonic-gate 	pool_count = 1;
1847c478bd9Sstevel@tonic-gate 	p0.p_pool = pool_default;
1857c478bd9Sstevel@tonic-gate 	global_zone->zone_pool = pool_default;
1867c478bd9Sstevel@tonic-gate 	pool_default->pool_ref = 1;
1877c478bd9Sstevel@tonic-gate }
1887c478bd9Sstevel@tonic-gate 
1897c478bd9Sstevel@tonic-gate /*
1907c478bd9Sstevel@tonic-gate  * Synchronization routines.
1917c478bd9Sstevel@tonic-gate  *
1927c478bd9Sstevel@tonic-gate  * pool_lock is only called from syscall-level routines (processor_bind(),
1937c478bd9Sstevel@tonic-gate  * pset_*(), and /dev/pool ioctls).  The pool "lock" may be held for long
1947c478bd9Sstevel@tonic-gate  * periods of time, including across sleeping operations, so we allow its
1957c478bd9Sstevel@tonic-gate  * acquisition to be interruptible.
1967c478bd9Sstevel@tonic-gate  *
1977c478bd9Sstevel@tonic-gate  * The current thread that owns the "lock" is stored in the variable
1987c478bd9Sstevel@tonic-gate  * pool_busy_thread, both to let pool_lock_held() work and to aid debugging.
1997c478bd9Sstevel@tonic-gate  */
2007c478bd9Sstevel@tonic-gate void
pool_lock(void)2017c478bd9Sstevel@tonic-gate pool_lock(void)
2027c478bd9Sstevel@tonic-gate {
2037c478bd9Sstevel@tonic-gate 	mutex_enter(&pool_mutex);
204e76e762eSacruz 	ASSERT(!pool_lock_held());
2057c478bd9Sstevel@tonic-gate 	while (pool_busy_thread != NULL)
2067c478bd9Sstevel@tonic-gate 		cv_wait(&pool_busy_cv, &pool_mutex);
2077c478bd9Sstevel@tonic-gate 	pool_busy_thread = curthread;
2087c478bd9Sstevel@tonic-gate 	mutex_exit(&pool_mutex);
2097c478bd9Sstevel@tonic-gate }
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate int
pool_lock_intr(void)2127c478bd9Sstevel@tonic-gate pool_lock_intr(void)
2137c478bd9Sstevel@tonic-gate {
2147c478bd9Sstevel@tonic-gate 	mutex_enter(&pool_mutex);
215e76e762eSacruz 	ASSERT(!pool_lock_held());
2167c478bd9Sstevel@tonic-gate 	while (pool_busy_thread != NULL) {
2177c478bd9Sstevel@tonic-gate 		if (cv_wait_sig(&pool_busy_cv, &pool_mutex) == 0) {
2187c478bd9Sstevel@tonic-gate 			cv_signal(&pool_busy_cv);
2197c478bd9Sstevel@tonic-gate 			mutex_exit(&pool_mutex);
2207c478bd9Sstevel@tonic-gate 			return (1);
2217c478bd9Sstevel@tonic-gate 		}
2227c478bd9Sstevel@tonic-gate 	}
2237c478bd9Sstevel@tonic-gate 	pool_busy_thread = curthread;
2247c478bd9Sstevel@tonic-gate 	mutex_exit(&pool_mutex);
2257c478bd9Sstevel@tonic-gate 	return (0);
2267c478bd9Sstevel@tonic-gate }
2277c478bd9Sstevel@tonic-gate 
2287c478bd9Sstevel@tonic-gate int
pool_lock_held(void)2297c478bd9Sstevel@tonic-gate pool_lock_held(void)
2307c478bd9Sstevel@tonic-gate {
2317c478bd9Sstevel@tonic-gate 	return (pool_busy_thread == curthread);
2327c478bd9Sstevel@tonic-gate }
2337c478bd9Sstevel@tonic-gate 
2347c478bd9Sstevel@tonic-gate void
pool_unlock(void)2357c478bd9Sstevel@tonic-gate pool_unlock(void)
2367c478bd9Sstevel@tonic-gate {
2377c478bd9Sstevel@tonic-gate 	mutex_enter(&pool_mutex);
238e76e762eSacruz 	ASSERT(pool_lock_held());
2397c478bd9Sstevel@tonic-gate 	pool_busy_thread = NULL;
2407c478bd9Sstevel@tonic-gate 	cv_signal(&pool_busy_cv);
2417c478bd9Sstevel@tonic-gate 	mutex_exit(&pool_mutex);
2427c478bd9Sstevel@tonic-gate }
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate /*
2457c478bd9Sstevel@tonic-gate  * Routines allowing fork(), exec(), exit(), and lwp_create() to synchronize
2467c478bd9Sstevel@tonic-gate  * with pool_do_bind().
2477c478bd9Sstevel@tonic-gate  *
2487c478bd9Sstevel@tonic-gate  * Calls to pool_barrier_enter() and pool_barrier_exit() must bracket all
2497c478bd9Sstevel@tonic-gate  * operations which modify pool or pset associations.  They can be called
2507c478bd9Sstevel@tonic-gate  * while the process is multi-threaded.  In the common case, when current
2517c478bd9Sstevel@tonic-gate  * process is not being rebound (PBWAIT flag is not set), these functions
2527c478bd9Sstevel@tonic-gate  * will be just incrementing and decrementing reference counts.
2537c478bd9Sstevel@tonic-gate  */
2547c478bd9Sstevel@tonic-gate void
pool_barrier_enter(void)2557c478bd9Sstevel@tonic-gate pool_barrier_enter(void)
2567c478bd9Sstevel@tonic-gate {
2577c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
2607c478bd9Sstevel@tonic-gate 	while (p->p_poolflag & PBWAIT)
2617c478bd9Sstevel@tonic-gate 		cv_wait(&p->p_poolcv, &p->p_lock);
2627c478bd9Sstevel@tonic-gate 	p->p_poolcnt++;
2637c478bd9Sstevel@tonic-gate }
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate void
pool_barrier_exit(void)2667c478bd9Sstevel@tonic-gate pool_barrier_exit(void)
2677c478bd9Sstevel@tonic-gate {
2687c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
2697c478bd9Sstevel@tonic-gate 
2707c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
2717c478bd9Sstevel@tonic-gate 	ASSERT(p->p_poolcnt > 0);
2727c478bd9Sstevel@tonic-gate 	p->p_poolcnt--;
2737c478bd9Sstevel@tonic-gate 	if (p->p_poolflag & PBWAIT) {
2747c478bd9Sstevel@tonic-gate 		mutex_enter(&pool_barrier_lock);
2757c478bd9Sstevel@tonic-gate 		ASSERT(pool_barrier_count > 0);
2767c478bd9Sstevel@tonic-gate 		pool_barrier_count--;
2777c478bd9Sstevel@tonic-gate 		if (pool_barrier_count == 0)
2787c478bd9Sstevel@tonic-gate 			cv_signal(&pool_barrier_cv);
2797c478bd9Sstevel@tonic-gate 		mutex_exit(&pool_barrier_lock);
2807c478bd9Sstevel@tonic-gate 		while (p->p_poolflag & PBWAIT)
2817c478bd9Sstevel@tonic-gate 			cv_wait(&p->p_poolcv, &p->p_lock);
2827c478bd9Sstevel@tonic-gate 	}
2837c478bd9Sstevel@tonic-gate }
2847c478bd9Sstevel@tonic-gate 
2857c478bd9Sstevel@tonic-gate /*
2867c478bd9Sstevel@tonic-gate  * Enable pools facility.
2877c478bd9Sstevel@tonic-gate  */
2887c478bd9Sstevel@tonic-gate static int
pool_enable(void)2897c478bd9Sstevel@tonic-gate pool_enable(void)
2907c478bd9Sstevel@tonic-gate {
2917c478bd9Sstevel@tonic-gate 	int ret;
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
2947c478bd9Sstevel@tonic-gate 	ASSERT(pool_count == 1);
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate 	ret = pool_pset_enable();
2977c478bd9Sstevel@tonic-gate 	if (ret != 0)
2987c478bd9Sstevel@tonic-gate 		return (ret);
2997c478bd9Sstevel@tonic-gate 	(void) nvlist_alloc(&pool_sys_prop, NV_UNIQUE_NAME, KM_SLEEP);
3007c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool_sys_prop, "system.name",
30122942fabSgm149974 	    "default");
3027c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool_sys_prop, "system.comment", "");
3037c478bd9Sstevel@tonic-gate 	(void) nvlist_add_int64(pool_sys_prop, "system.version", 1);
3047c478bd9Sstevel@tonic-gate 	(void) nvlist_add_byte(pool_sys_prop, "system.bind-default", 1);
3050209230bSgjelinek 	(void) nvlist_add_string(pool_sys_prop, "system.poold.objectives",
3060209230bSgjelinek 	    "wt-load");
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 	(void) nvlist_alloc(&pool_default->pool_props,
3097c478bd9Sstevel@tonic-gate 	    NV_UNIQUE_NAME, KM_SLEEP);
3107c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool_default->pool_props,
3117c478bd9Sstevel@tonic-gate 	    "pool.name", "pool_default");
3127c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool_default->pool_props, "pool.comment", "");
3137c478bd9Sstevel@tonic-gate 	(void) nvlist_add_byte(pool_default->pool_props, "pool.default", 1);
3147c478bd9Sstevel@tonic-gate 	(void) nvlist_add_byte(pool_default->pool_props, "pool.active", 1);
3157c478bd9Sstevel@tonic-gate 	(void) nvlist_add_int64(pool_default->pool_props,
3167c478bd9Sstevel@tonic-gate 	    "pool.importance", 1);
3177c478bd9Sstevel@tonic-gate 	(void) nvlist_add_int64(pool_default->pool_props, "pool.sys_id",
3187c478bd9Sstevel@tonic-gate 	    pool_default->pool_id);
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	pool_sys_mod = pool_pool_mod = gethrtime();
3217c478bd9Sstevel@tonic-gate 
3227c478bd9Sstevel@tonic-gate 	return (ret);
3237c478bd9Sstevel@tonic-gate }
3247c478bd9Sstevel@tonic-gate 
3257c478bd9Sstevel@tonic-gate /*
3267c478bd9Sstevel@tonic-gate  * Disable pools facility.
3277c478bd9Sstevel@tonic-gate  */
3287c478bd9Sstevel@tonic-gate static int
pool_disable(void)3297c478bd9Sstevel@tonic-gate pool_disable(void)
3307c478bd9Sstevel@tonic-gate {
3317c478bd9Sstevel@tonic-gate 	int ret;
3327c478bd9Sstevel@tonic-gate 
3337c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate 	if (pool_count > 1)	/* must destroy all pools first */
3367c478bd9Sstevel@tonic-gate 		return (EBUSY);
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate 	ret = pool_pset_disable();
3397c478bd9Sstevel@tonic-gate 	if (ret != 0)
3407c478bd9Sstevel@tonic-gate 		return (ret);
3417c478bd9Sstevel@tonic-gate 	if (pool_sys_prop != NULL) {
3427c478bd9Sstevel@tonic-gate 		nvlist_free(pool_sys_prop);
3437c478bd9Sstevel@tonic-gate 		pool_sys_prop = NULL;
3447c478bd9Sstevel@tonic-gate 	}
3457c478bd9Sstevel@tonic-gate 	if (pool_default->pool_props != NULL) {
3467c478bd9Sstevel@tonic-gate 		nvlist_free(pool_default->pool_props);
3477c478bd9Sstevel@tonic-gate 		pool_default->pool_props = NULL;
3487c478bd9Sstevel@tonic-gate 	}
3497c478bd9Sstevel@tonic-gate 	return (0);
3507c478bd9Sstevel@tonic-gate }
3517c478bd9Sstevel@tonic-gate 
3527c478bd9Sstevel@tonic-gate pool_t *
pool_lookup_pool_by_name(char * name)3537c478bd9Sstevel@tonic-gate pool_lookup_pool_by_name(char *name)
3547c478bd9Sstevel@tonic-gate {
3557c478bd9Sstevel@tonic-gate 	pool_t *pool = pool_default;
3567c478bd9Sstevel@tonic-gate 	char *p;
3577c478bd9Sstevel@tonic-gate 
3587c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
3597c478bd9Sstevel@tonic-gate 	for (pool = list_head(&pool_list); pool;
3607c478bd9Sstevel@tonic-gate 	    pool = list_next(&pool_list, pool)) {
3617c478bd9Sstevel@tonic-gate 		if (nvlist_lookup_string(pool->pool_props,
3627c478bd9Sstevel@tonic-gate 		    "pool.name", &p) == 0 && strcmp(name, p) == 0)
3637c478bd9Sstevel@tonic-gate 			return (pool);
3647c478bd9Sstevel@tonic-gate 	}
3657c478bd9Sstevel@tonic-gate 	return (NULL);
3667c478bd9Sstevel@tonic-gate }
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate pool_t *
pool_lookup_pool_by_id(poolid_t poolid)3697c478bd9Sstevel@tonic-gate pool_lookup_pool_by_id(poolid_t poolid)
3707c478bd9Sstevel@tonic-gate {
3717c478bd9Sstevel@tonic-gate 	pool_t *pool = pool_default;
3727c478bd9Sstevel@tonic-gate 
3737c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
3747c478bd9Sstevel@tonic-gate 	for (pool = list_head(&pool_list); pool;
3757c478bd9Sstevel@tonic-gate 	    pool = list_next(&pool_list, pool)) {
3767c478bd9Sstevel@tonic-gate 		if (pool->pool_id == poolid)
3777c478bd9Sstevel@tonic-gate 			return (pool);
3787c478bd9Sstevel@tonic-gate 	}
3797c478bd9Sstevel@tonic-gate 	return (NULL);
3807c478bd9Sstevel@tonic-gate }
3817c478bd9Sstevel@tonic-gate 
3820dc2366fSVenugopal Iyer pool_t *
pool_lookup_pool_by_pset(int id)3830dc2366fSVenugopal Iyer pool_lookup_pool_by_pset(int id)
3840dc2366fSVenugopal Iyer {
3850dc2366fSVenugopal Iyer 	pool_t *pool = pool_default;
3860dc2366fSVenugopal Iyer 	psetid_t psetid = (psetid_t)id;
3870dc2366fSVenugopal Iyer 
3880dc2366fSVenugopal Iyer 	ASSERT(pool_lock_held());
3890dc2366fSVenugopal Iyer 	for (pool = list_head(&pool_list); pool != NULL;
3900dc2366fSVenugopal Iyer 	    pool = list_next(&pool_list, pool)) {
3910dc2366fSVenugopal Iyer 		if (pool->pool_pset->pset_id == psetid)
3920dc2366fSVenugopal Iyer 			return (pool);
3930dc2366fSVenugopal Iyer 	}
3940dc2366fSVenugopal Iyer 	return (NULL);
3950dc2366fSVenugopal Iyer }
3960dc2366fSVenugopal Iyer 
3977c478bd9Sstevel@tonic-gate /*
3987c478bd9Sstevel@tonic-gate  * Create new pool, associate it with default resource sets, and give
3997c478bd9Sstevel@tonic-gate  * it a temporary name.
4007c478bd9Sstevel@tonic-gate  */
4017c478bd9Sstevel@tonic-gate static int
pool_pool_create(poolid_t * poolid)4027c478bd9Sstevel@tonic-gate pool_pool_create(poolid_t *poolid)
4037c478bd9Sstevel@tonic-gate {
4047c478bd9Sstevel@tonic-gate 	pool_t *pool;
4057c478bd9Sstevel@tonic-gate 	char pool_name[40];
4067c478bd9Sstevel@tonic-gate 
4077c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
4087c478bd9Sstevel@tonic-gate 
4097c478bd9Sstevel@tonic-gate 	pool = kmem_zalloc(sizeof (pool_t), KM_SLEEP);
4107c478bd9Sstevel@tonic-gate 	pool->pool_id = *poolid = id_alloc(pool_ids);
4117c478bd9Sstevel@tonic-gate 	pool->pool_pset = pool_pset_default;
4127c478bd9Sstevel@tonic-gate 	pool_pset_default->pset_npools++;
4137c478bd9Sstevel@tonic-gate 	list_insert_tail(&pool_list, pool);
4147c478bd9Sstevel@tonic-gate 	(void) nvlist_alloc(&pool->pool_props, NV_UNIQUE_NAME, KM_SLEEP);
4157c478bd9Sstevel@tonic-gate 	(void) nvlist_add_int64(pool->pool_props, "pool.sys_id", pool->pool_id);
4167c478bd9Sstevel@tonic-gate 	(void) nvlist_add_byte(pool->pool_props, "pool.default", 0);
4177c478bd9Sstevel@tonic-gate 	pool_pool_mod = gethrtime();
4187c478bd9Sstevel@tonic-gate 	(void) snprintf(pool_name, sizeof (pool_name), "pool_%lld",
4197c478bd9Sstevel@tonic-gate 	    pool_pool_mod);
4207c478bd9Sstevel@tonic-gate 	(void) nvlist_add_string(pool->pool_props, "pool.name", pool_name);
4217c478bd9Sstevel@tonic-gate 	pool_count++;
4227c478bd9Sstevel@tonic-gate 	return (0);
4237c478bd9Sstevel@tonic-gate }
4247c478bd9Sstevel@tonic-gate 
4257c478bd9Sstevel@tonic-gate struct destroy_zone_arg {
4267c478bd9Sstevel@tonic-gate 	pool_t *old;
4277c478bd9Sstevel@tonic-gate 	pool_t *new;
4287c478bd9Sstevel@tonic-gate };
4297c478bd9Sstevel@tonic-gate 
4307c478bd9Sstevel@tonic-gate /*
4317c478bd9Sstevel@tonic-gate  * Update pool pointers for zones that are currently bound to pool "old"
4327c478bd9Sstevel@tonic-gate  * to be bound to pool "new".
4337c478bd9Sstevel@tonic-gate  */
4347c478bd9Sstevel@tonic-gate static int
pool_destroy_zone_cb(zone_t * zone,void * arg)4357c478bd9Sstevel@tonic-gate pool_destroy_zone_cb(zone_t *zone, void *arg)
4367c478bd9Sstevel@tonic-gate {
4377c478bd9Sstevel@tonic-gate 	struct destroy_zone_arg *dza = arg;
4387c478bd9Sstevel@tonic-gate 
4397c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
4407c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	if (zone_pool_get(zone) == dza->old)
4437c478bd9Sstevel@tonic-gate 		zone_pool_set(zone, dza->new);
4447c478bd9Sstevel@tonic-gate 	return (0);
4457c478bd9Sstevel@tonic-gate }
4467c478bd9Sstevel@tonic-gate 
4477c478bd9Sstevel@tonic-gate /*
4487c478bd9Sstevel@tonic-gate  * Destroy specified pool, and rebind all processes in it
4497c478bd9Sstevel@tonic-gate  * to the default pool.
4507c478bd9Sstevel@tonic-gate  */
4517c478bd9Sstevel@tonic-gate static int
pool_pool_destroy(poolid_t poolid)4527c478bd9Sstevel@tonic-gate pool_pool_destroy(poolid_t poolid)
4537c478bd9Sstevel@tonic-gate {
4547c478bd9Sstevel@tonic-gate 	pool_t *pool;
4557c478bd9Sstevel@tonic-gate 	int ret;
4567c478bd9Sstevel@tonic-gate 
4577c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
4587c478bd9Sstevel@tonic-gate 
4597c478bd9Sstevel@tonic-gate 	if (poolid == POOL_DEFAULT)
4607c478bd9Sstevel@tonic-gate 		return (EINVAL);
4617c478bd9Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
4627c478bd9Sstevel@tonic-gate 		return (ESRCH);
4637c478bd9Sstevel@tonic-gate 	ret = pool_do_bind(pool_default, P_POOLID, poolid, POOL_BIND_ALL);
4647c478bd9Sstevel@tonic-gate 	if (ret == 0) {
4657c478bd9Sstevel@tonic-gate 		struct destroy_zone_arg dzarg;
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 		dzarg.old = pool;
4687c478bd9Sstevel@tonic-gate 		dzarg.new = pool_default;
4697c478bd9Sstevel@tonic-gate 		mutex_enter(&cpu_lock);
4707c478bd9Sstevel@tonic-gate 		ret = zone_walk(pool_destroy_zone_cb, &dzarg);
4717c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
4727c478bd9Sstevel@tonic-gate 		ASSERT(ret == 0);
4737c478bd9Sstevel@tonic-gate 		ASSERT(pool->pool_ref == 0);
4747c478bd9Sstevel@tonic-gate 		(void) nvlist_free(pool->pool_props);
4757c478bd9Sstevel@tonic-gate 		id_free(pool_ids, pool->pool_id);
4767c478bd9Sstevel@tonic-gate 		pool->pool_pset->pset_npools--;
4777c478bd9Sstevel@tonic-gate 		list_remove(&pool_list, pool);
4787c478bd9Sstevel@tonic-gate 		pool_count--;
4797c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
4807c478bd9Sstevel@tonic-gate 		kmem_free(pool, sizeof (pool_t));
4817c478bd9Sstevel@tonic-gate 	}
4827c478bd9Sstevel@tonic-gate 	return (ret);
4837c478bd9Sstevel@tonic-gate }
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate /*
4867c478bd9Sstevel@tonic-gate  * Create new pool or resource set.
4877c478bd9Sstevel@tonic-gate  */
4887c478bd9Sstevel@tonic-gate int
pool_create(int class,int subclass,id_t * id)4897c478bd9Sstevel@tonic-gate pool_create(int class, int subclass, id_t *id)
4907c478bd9Sstevel@tonic-gate {
4917c478bd9Sstevel@tonic-gate 	int ret;
4927c478bd9Sstevel@tonic-gate 
4937c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
4947c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
4957c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
4967c478bd9Sstevel@tonic-gate 	switch (class) {
4977c478bd9Sstevel@tonic-gate 	case PEC_POOL:
4987c478bd9Sstevel@tonic-gate 		ret = pool_pool_create((poolid_t *)id);
4997c478bd9Sstevel@tonic-gate 		break;
5007c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
5017c478bd9Sstevel@tonic-gate 		switch (subclass) {
5027c478bd9Sstevel@tonic-gate 		case PREC_PSET:
5037c478bd9Sstevel@tonic-gate 			ret = pool_pset_create((psetid_t *)id);
5047c478bd9Sstevel@tonic-gate 			break;
5057c478bd9Sstevel@tonic-gate 		default:
5067c478bd9Sstevel@tonic-gate 			ret = EINVAL;
5077c478bd9Sstevel@tonic-gate 		}
5087c478bd9Sstevel@tonic-gate 		break;
5097c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
5107c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
5117c478bd9Sstevel@tonic-gate 		break;
5127c478bd9Sstevel@tonic-gate 	default:
5137c478bd9Sstevel@tonic-gate 		ret = EINVAL;
5147c478bd9Sstevel@tonic-gate 	}
5157c478bd9Sstevel@tonic-gate 	return (ret);
5167c478bd9Sstevel@tonic-gate }
5177c478bd9Sstevel@tonic-gate 
5187c478bd9Sstevel@tonic-gate /*
5197c478bd9Sstevel@tonic-gate  * Destroy an existing pool or resource set.
5207c478bd9Sstevel@tonic-gate  */
5217c478bd9Sstevel@tonic-gate int
pool_destroy(int class,int subclass,id_t id)5227c478bd9Sstevel@tonic-gate pool_destroy(int class, int subclass, id_t id)
5237c478bd9Sstevel@tonic-gate {
5247c478bd9Sstevel@tonic-gate 	int ret;
5257c478bd9Sstevel@tonic-gate 
5267c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
5277c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
5287c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
5297c478bd9Sstevel@tonic-gate 	switch (class) {
5307c478bd9Sstevel@tonic-gate 	case PEC_POOL:
5317c478bd9Sstevel@tonic-gate 		ret = pool_pool_destroy((poolid_t)id);
5327c478bd9Sstevel@tonic-gate 		break;
5337c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
5347c478bd9Sstevel@tonic-gate 		switch (subclass) {
5357c478bd9Sstevel@tonic-gate 		case PREC_PSET:
5367c478bd9Sstevel@tonic-gate 			ret = pool_pset_destroy((psetid_t)id);
5377c478bd9Sstevel@tonic-gate 			break;
5387c478bd9Sstevel@tonic-gate 		default:
5397c478bd9Sstevel@tonic-gate 			ret = EINVAL;
5407c478bd9Sstevel@tonic-gate 		}
5417c478bd9Sstevel@tonic-gate 		break;
5427c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
5437c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
5447c478bd9Sstevel@tonic-gate 		break;
5457c478bd9Sstevel@tonic-gate 	default:
5467c478bd9Sstevel@tonic-gate 		ret = EINVAL;
5477c478bd9Sstevel@tonic-gate 	}
5487c478bd9Sstevel@tonic-gate 	return (ret);
5497c478bd9Sstevel@tonic-gate }
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate /*
5527c478bd9Sstevel@tonic-gate  * Enable or disable pools.
5537c478bd9Sstevel@tonic-gate  */
5547c478bd9Sstevel@tonic-gate int
pool_status(int status)5557c478bd9Sstevel@tonic-gate pool_status(int status)
5567c478bd9Sstevel@tonic-gate {
5577c478bd9Sstevel@tonic-gate 	int ret = 0;
5587c478bd9Sstevel@tonic-gate 
5597c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
5607c478bd9Sstevel@tonic-gate 
5617c478bd9Sstevel@tonic-gate 	if (pool_state == status)
5627c478bd9Sstevel@tonic-gate 		return (0);
5637c478bd9Sstevel@tonic-gate 	switch (status) {
5647c478bd9Sstevel@tonic-gate 	case POOL_ENABLED:
5657c478bd9Sstevel@tonic-gate 		ret = pool_enable();
5667c478bd9Sstevel@tonic-gate 		if (ret != 0)
5677c478bd9Sstevel@tonic-gate 			return (ret);
5687c478bd9Sstevel@tonic-gate 		pool_state = POOL_ENABLED;
5690dc2366fSVenugopal Iyer 		pool_event_dispatch(POOL_E_ENABLE, NULL);
5707c478bd9Sstevel@tonic-gate 		break;
5717c478bd9Sstevel@tonic-gate 	case POOL_DISABLED:
5727c478bd9Sstevel@tonic-gate 		ret = pool_disable();
5737c478bd9Sstevel@tonic-gate 		if (ret != 0)
5747c478bd9Sstevel@tonic-gate 			return (ret);
5757c478bd9Sstevel@tonic-gate 		pool_state = POOL_DISABLED;
5760dc2366fSVenugopal Iyer 		pool_event_dispatch(POOL_E_DISABLE, NULL);
5777c478bd9Sstevel@tonic-gate 		break;
5787c478bd9Sstevel@tonic-gate 	default:
5797c478bd9Sstevel@tonic-gate 		ret = EINVAL;
5807c478bd9Sstevel@tonic-gate 	}
5817c478bd9Sstevel@tonic-gate 	return (ret);
5827c478bd9Sstevel@tonic-gate }
5837c478bd9Sstevel@tonic-gate 
5847c478bd9Sstevel@tonic-gate /*
5857c478bd9Sstevel@tonic-gate  * Associate pool with resource set.
5867c478bd9Sstevel@tonic-gate  */
5877c478bd9Sstevel@tonic-gate int
pool_assoc(poolid_t poolid,int idtype,id_t id)5887c478bd9Sstevel@tonic-gate pool_assoc(poolid_t poolid, int idtype, id_t id)
5897c478bd9Sstevel@tonic-gate {
5907c478bd9Sstevel@tonic-gate 	int ret;
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
5937c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
5947c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
5957c478bd9Sstevel@tonic-gate 	switch (idtype) {
5967c478bd9Sstevel@tonic-gate 	case PREC_PSET:
5977c478bd9Sstevel@tonic-gate 		ret = pool_pset_assoc(poolid, (psetid_t)id);
5980dc2366fSVenugopal Iyer 		if (ret == 0)
5990dc2366fSVenugopal Iyer 			pool_event_dispatch(POOL_E_CHANGE, poolid);
6007c478bd9Sstevel@tonic-gate 		break;
6017c478bd9Sstevel@tonic-gate 	default:
6027c478bd9Sstevel@tonic-gate 		ret = EINVAL;
6037c478bd9Sstevel@tonic-gate 	}
6047c478bd9Sstevel@tonic-gate 	if (ret == 0)
6057c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
6067c478bd9Sstevel@tonic-gate 	return (ret);
6077c478bd9Sstevel@tonic-gate }
6087c478bd9Sstevel@tonic-gate 
6097c478bd9Sstevel@tonic-gate /*
6107c478bd9Sstevel@tonic-gate  * Disassociate resource set from pool.
6117c478bd9Sstevel@tonic-gate  */
6127c478bd9Sstevel@tonic-gate int
pool_dissoc(poolid_t poolid,int idtype)6137c478bd9Sstevel@tonic-gate pool_dissoc(poolid_t poolid, int idtype)
6147c478bd9Sstevel@tonic-gate {
6157c478bd9Sstevel@tonic-gate 	int ret;
6167c478bd9Sstevel@tonic-gate 
6177c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
6187c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
6197c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
6207c478bd9Sstevel@tonic-gate 	switch (idtype) {
6217c478bd9Sstevel@tonic-gate 	case PREC_PSET:
6227c478bd9Sstevel@tonic-gate 		ret = pool_pset_assoc(poolid, PS_NONE);
6230dc2366fSVenugopal Iyer 		if (ret == 0)
6240dc2366fSVenugopal Iyer 			pool_event_dispatch(POOL_E_CHANGE, poolid);
6257c478bd9Sstevel@tonic-gate 		break;
6267c478bd9Sstevel@tonic-gate 	default:
6277c478bd9Sstevel@tonic-gate 		ret = EINVAL;
6287c478bd9Sstevel@tonic-gate 	}
6297c478bd9Sstevel@tonic-gate 	if (ret == 0)
6307c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
6317c478bd9Sstevel@tonic-gate 	return (ret);
6327c478bd9Sstevel@tonic-gate }
6337c478bd9Sstevel@tonic-gate 
6347c478bd9Sstevel@tonic-gate /*
6357c478bd9Sstevel@tonic-gate  * Transfer specified quantity of resources between resource sets.
6367c478bd9Sstevel@tonic-gate  */
6377c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6387c478bd9Sstevel@tonic-gate int
pool_transfer(int type,id_t src,id_t dst,uint64_t qty)6397c478bd9Sstevel@tonic-gate pool_transfer(int type, id_t src, id_t dst, uint64_t qty)
6407c478bd9Sstevel@tonic-gate {
6417c478bd9Sstevel@tonic-gate 	int ret = EINVAL;
6420dc2366fSVenugopal Iyer 
6437c478bd9Sstevel@tonic-gate 	return (ret);
6447c478bd9Sstevel@tonic-gate }
6457c478bd9Sstevel@tonic-gate 
6460dc2366fSVenugopal Iyer static poolid_t
pool_lookup_id_by_pset(int id)6470dc2366fSVenugopal Iyer pool_lookup_id_by_pset(int id)
6480dc2366fSVenugopal Iyer {
6490dc2366fSVenugopal Iyer 	pool_t *pool = pool_default;
6500dc2366fSVenugopal Iyer 	psetid_t psetid = (psetid_t)id;
6510dc2366fSVenugopal Iyer 
6520dc2366fSVenugopal Iyer 	ASSERT(pool_lock_held());
6530dc2366fSVenugopal Iyer 	for (pool = list_head(&pool_list); pool != NULL;
6540dc2366fSVenugopal Iyer 	    pool = list_next(&pool_list, pool)) {
6550dc2366fSVenugopal Iyer 		if (pool->pool_pset->pset_id == psetid)
6560dc2366fSVenugopal Iyer 			return (pool->pool_id);
6570dc2366fSVenugopal Iyer 	}
6580dc2366fSVenugopal Iyer 	return (POOL_INVALID);
6590dc2366fSVenugopal Iyer }
6600dc2366fSVenugopal Iyer 
6617c478bd9Sstevel@tonic-gate /*
6627c478bd9Sstevel@tonic-gate  * Transfer resources specified by their IDs between resource sets.
6637c478bd9Sstevel@tonic-gate  */
6647c478bd9Sstevel@tonic-gate int
pool_xtransfer(int type,id_t src_pset,id_t dst_pset,uint_t size,id_t * ids)6650dc2366fSVenugopal Iyer pool_xtransfer(int type, id_t src_pset, id_t dst_pset, uint_t size, id_t *ids)
6667c478bd9Sstevel@tonic-gate {
6677c478bd9Sstevel@tonic-gate 	int ret;
6680dc2366fSVenugopal Iyer 	poolid_t src_pool, dst_pool;
6697c478bd9Sstevel@tonic-gate 
6707c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
6717c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
6727c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
6737c478bd9Sstevel@tonic-gate 	switch (type) {
6747c478bd9Sstevel@tonic-gate 	case PREC_PSET:
6750dc2366fSVenugopal Iyer 		ret = pool_pset_xtransfer((psetid_t)src_pset,
6760dc2366fSVenugopal Iyer 		    (psetid_t)dst_pset, size, ids);
6773b132919SMichael Lim 		if (ret == 0) {
6783b132919SMichael Lim 			if ((src_pool =  pool_lookup_id_by_pset(src_pset)) !=
6793b132919SMichael Lim 			    POOL_INVALID)
6800dc2366fSVenugopal Iyer 				pool_event_dispatch(POOL_E_CHANGE, src_pool);
6813b132919SMichael Lim 			if ((dst_pool =  pool_lookup_id_by_pset(dst_pset)) !=
6823b132919SMichael Lim 			    POOL_INVALID)
6830dc2366fSVenugopal Iyer 				pool_event_dispatch(POOL_E_CHANGE, dst_pool);
6843b132919SMichael Lim 		}
6857c478bd9Sstevel@tonic-gate 		break;
6867c478bd9Sstevel@tonic-gate 	default:
6877c478bd9Sstevel@tonic-gate 		ret = EINVAL;
6887c478bd9Sstevel@tonic-gate 	}
6897c478bd9Sstevel@tonic-gate 	return (ret);
6907c478bd9Sstevel@tonic-gate }
6917c478bd9Sstevel@tonic-gate 
6927c478bd9Sstevel@tonic-gate /*
6937c478bd9Sstevel@tonic-gate  * Bind processes to pools.
6947c478bd9Sstevel@tonic-gate  */
6957c478bd9Sstevel@tonic-gate int
pool_bind(poolid_t poolid,idtype_t idtype,id_t id)6967c478bd9Sstevel@tonic-gate pool_bind(poolid_t poolid, idtype_t idtype, id_t id)
6977c478bd9Sstevel@tonic-gate {
6987c478bd9Sstevel@tonic-gate 	pool_t	*pool;
6997c478bd9Sstevel@tonic-gate 
7007c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7017c478bd9Sstevel@tonic-gate 
7027c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
7037c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
7047c478bd9Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
7057c478bd9Sstevel@tonic-gate 		return (ESRCH);
7067c478bd9Sstevel@tonic-gate 
7077c478bd9Sstevel@tonic-gate 	switch (idtype) {
7087c478bd9Sstevel@tonic-gate 	case P_PID:
7097c478bd9Sstevel@tonic-gate 	case P_TASKID:
7107c478bd9Sstevel@tonic-gate 	case P_PROJID:
7117c478bd9Sstevel@tonic-gate 	case P_ZONEID:
7127c478bd9Sstevel@tonic-gate 		break;
7137c478bd9Sstevel@tonic-gate 	default:
7147c478bd9Sstevel@tonic-gate 		return (EINVAL);
7157c478bd9Sstevel@tonic-gate 	}
7167c478bd9Sstevel@tonic-gate 	return (pool_do_bind(pool, idtype, id, POOL_BIND_ALL));
7177c478bd9Sstevel@tonic-gate }
7187c478bd9Sstevel@tonic-gate 
7197c478bd9Sstevel@tonic-gate /*
7207c478bd9Sstevel@tonic-gate  * Query pool binding of the specifed process.
7217c478bd9Sstevel@tonic-gate  */
7227c478bd9Sstevel@tonic-gate int
pool_query_binding(idtype_t idtype,id_t id,id_t * poolid)7237c478bd9Sstevel@tonic-gate pool_query_binding(idtype_t idtype, id_t id, id_t *poolid)
7247c478bd9Sstevel@tonic-gate {
7257c478bd9Sstevel@tonic-gate 	proc_t *p;
7267c478bd9Sstevel@tonic-gate 
7277c478bd9Sstevel@tonic-gate 	if (idtype != P_PID)
7287c478bd9Sstevel@tonic-gate 		return (ENOTSUP);
7297c478bd9Sstevel@tonic-gate 	if (id == P_MYID)
7307c478bd9Sstevel@tonic-gate 		id = curproc->p_pid;
7317c478bd9Sstevel@tonic-gate 
7327c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7337c478bd9Sstevel@tonic-gate 
7347c478bd9Sstevel@tonic-gate 	mutex_enter(&pidlock);
7357c478bd9Sstevel@tonic-gate 	if ((p = prfind((pid_t)id)) == NULL) {
7367c478bd9Sstevel@tonic-gate 		mutex_exit(&pidlock);
7377c478bd9Sstevel@tonic-gate 		return (ESRCH);
7387c478bd9Sstevel@tonic-gate 	}
7397c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
7407c478bd9Sstevel@tonic-gate 	/*
7417c478bd9Sstevel@tonic-gate 	 * In local zones, lie about pool bindings of processes from
7427c478bd9Sstevel@tonic-gate 	 * the global zone.
7437c478bd9Sstevel@tonic-gate 	 */
7447c478bd9Sstevel@tonic-gate 	if (!INGLOBALZONE(curproc) && INGLOBALZONE(p)) {
7457c478bd9Sstevel@tonic-gate 		pool_t *pool;
7467c478bd9Sstevel@tonic-gate 
7477c478bd9Sstevel@tonic-gate 		pool = zone_pool_get(curproc->p_zone);
7487c478bd9Sstevel@tonic-gate 		*poolid = pool->pool_id;
7497c478bd9Sstevel@tonic-gate 	} else {
7507c478bd9Sstevel@tonic-gate 		*poolid = p->p_pool->pool_id;
7517c478bd9Sstevel@tonic-gate 	}
7527c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
7537c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
7547c478bd9Sstevel@tonic-gate 	return (0);
7557c478bd9Sstevel@tonic-gate }
7567c478bd9Sstevel@tonic-gate 
7577c478bd9Sstevel@tonic-gate static ea_object_t *
pool_system_pack(void)7587c478bd9Sstevel@tonic-gate pool_system_pack(void)
7597c478bd9Sstevel@tonic-gate {
7607c478bd9Sstevel@tonic-gate 	ea_object_t *eo_system;
7617c478bd9Sstevel@tonic-gate 	size_t bufsz = 0;
7627c478bd9Sstevel@tonic-gate 	char *buf = NULL;
7637c478bd9Sstevel@tonic-gate 
7647c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7657c478bd9Sstevel@tonic-gate 
7667c478bd9Sstevel@tonic-gate 	eo_system = ea_alloc_group(EXT_GROUP | EXC_LOCAL | EXD_GROUP_SYSTEM);
7677c478bd9Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, &pool_sys_mod, sizeof (hrtime_t),
7687c478bd9Sstevel@tonic-gate 	    EXC_LOCAL | EXD_SYSTEM_TSTAMP | EXT_UINT64);
7697c478bd9Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
7707c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_system, &pool_pool_mod,
7717c478bd9Sstevel@tonic-gate 		    sizeof (hrtime_t),
7727c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_TSTAMP | EXT_UINT64);
7737c478bd9Sstevel@tonic-gate 	else
7747c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_system,
7757c478bd9Sstevel@tonic-gate 		    &curproc->p_zone->zone_pool_mod,
7767c478bd9Sstevel@tonic-gate 		    sizeof (hrtime_t),
7777c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_TSTAMP | EXT_UINT64);
7787c478bd9Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, &pool_pset_mod, sizeof (hrtime_t),
7797c478bd9Sstevel@tonic-gate 	    EXC_LOCAL | EXD_PSET_TSTAMP | EXT_UINT64);
7807c478bd9Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, &pool_cpu_mod, sizeof (hrtime_t),
7817c478bd9Sstevel@tonic-gate 	    EXC_LOCAL | EXD_CPU_TSTAMP | EXT_UINT64);
7827c478bd9Sstevel@tonic-gate 	(void) nvlist_pack(pool_sys_prop, &buf, &bufsz, NV_ENCODE_NATIVE, 0);
7837c478bd9Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, buf, bufsz,
7847c478bd9Sstevel@tonic-gate 	    EXC_LOCAL | EXD_SYSTEM_PROP | EXT_RAW);
7857c478bd9Sstevel@tonic-gate 	kmem_free(buf, bufsz);
7867c478bd9Sstevel@tonic-gate 	return (eo_system);
7877c478bd9Sstevel@tonic-gate }
7887c478bd9Sstevel@tonic-gate 
7897c478bd9Sstevel@tonic-gate /*
7907c478bd9Sstevel@tonic-gate  * Pack information about pools and attach it to specified exacct group.
7917c478bd9Sstevel@tonic-gate  */
7927c478bd9Sstevel@tonic-gate static int
pool_pool_pack(ea_object_t * eo_system)7937c478bd9Sstevel@tonic-gate pool_pool_pack(ea_object_t *eo_system)
7947c478bd9Sstevel@tonic-gate {
7957c478bd9Sstevel@tonic-gate 	ea_object_t *eo_pool;
7967c478bd9Sstevel@tonic-gate 	pool_t *pool;
7977c478bd9Sstevel@tonic-gate 	size_t bufsz;
7987c478bd9Sstevel@tonic-gate 	char *buf;
7997c478bd9Sstevel@tonic-gate 	pool_t *myzonepool;
8007c478bd9Sstevel@tonic-gate 
8017c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8027c478bd9Sstevel@tonic-gate 	myzonepool = zone_pool_get(curproc->p_zone);
8037c478bd9Sstevel@tonic-gate 	for (pool = list_head(&pool_list); pool;
8047c478bd9Sstevel@tonic-gate 	    pool = list_next(&pool_list, pool)) {
8057c478bd9Sstevel@tonic-gate 		if (!INGLOBALZONE(curproc) && myzonepool != pool)
8067c478bd9Sstevel@tonic-gate 			continue;
8077c478bd9Sstevel@tonic-gate 		bufsz = 0;
8087c478bd9Sstevel@tonic-gate 		buf = NULL;
8097c478bd9Sstevel@tonic-gate 		eo_pool = ea_alloc_group(EXT_GROUP |
8107c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_GROUP_POOL);
8117c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_pool, &pool->pool_id, sizeof (id_t),
8127c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_POOLID | EXT_UINT32);
8137c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_pool, &pool->pool_pset->pset_id,
8147c478bd9Sstevel@tonic-gate 		    sizeof (id_t), EXC_LOCAL | EXD_POOL_PSETID | EXT_UINT32);
8157c478bd9Sstevel@tonic-gate 		(void) nvlist_pack(pool->pool_props, &buf, &bufsz,
8167c478bd9Sstevel@tonic-gate 		    NV_ENCODE_NATIVE, 0);
8177c478bd9Sstevel@tonic-gate 		(void) ea_attach_item(eo_pool, buf, bufsz,
8187c478bd9Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_PROP | EXT_RAW);
8197c478bd9Sstevel@tonic-gate 		kmem_free(buf, bufsz);
8207c478bd9Sstevel@tonic-gate 		(void) ea_attach_to_group(eo_system, eo_pool);
8217c478bd9Sstevel@tonic-gate 	}
8227c478bd9Sstevel@tonic-gate 	return (0);
8237c478bd9Sstevel@tonic-gate }
8247c478bd9Sstevel@tonic-gate 
8257c478bd9Sstevel@tonic-gate /*
8267c478bd9Sstevel@tonic-gate  * Pack the whole pool configuration in the specified buffer.
8277c478bd9Sstevel@tonic-gate  */
8287c478bd9Sstevel@tonic-gate int
pool_pack_conf(void * kbuf,size_t kbufsz,size_t * asize)8297c478bd9Sstevel@tonic-gate pool_pack_conf(void *kbuf, size_t kbufsz, size_t *asize)
8307c478bd9Sstevel@tonic-gate {
8317c478bd9Sstevel@tonic-gate 	ea_object_t *eo_system;
8327c478bd9Sstevel@tonic-gate 	size_t ksize;
8337c478bd9Sstevel@tonic-gate 	int ret = 0;
8347c478bd9Sstevel@tonic-gate 
8357c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8367c478bd9Sstevel@tonic-gate 
8377c478bd9Sstevel@tonic-gate 	eo_system = pool_system_pack();		/* 1. pack system */
8387c478bd9Sstevel@tonic-gate 	(void) pool_pool_pack(eo_system);	/* 2. pack all pools */
8397c478bd9Sstevel@tonic-gate 	(void) pool_pset_pack(eo_system);	/* 3. pack all psets */
8407c478bd9Sstevel@tonic-gate 	ksize = ea_pack_object(eo_system, NULL, 0);
8417c478bd9Sstevel@tonic-gate 	if (kbuf == NULL || kbufsz == 0)
8427c478bd9Sstevel@tonic-gate 		*asize = ksize;
8437c478bd9Sstevel@tonic-gate 	else if (ksize > kbufsz)
8447c478bd9Sstevel@tonic-gate 		ret = ENOMEM;
8457c478bd9Sstevel@tonic-gate 	else
8467c478bd9Sstevel@tonic-gate 		*asize = ea_pack_object(eo_system, kbuf, kbufsz);
8477c478bd9Sstevel@tonic-gate 	ea_free_object(eo_system, EUP_ALLOC);
8487c478bd9Sstevel@tonic-gate 	return (ret);
8497c478bd9Sstevel@tonic-gate }
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate /*
8527c478bd9Sstevel@tonic-gate  * Start/end the commit transaction.  If commit transaction is currently
8537c478bd9Sstevel@tonic-gate  * in progress, then all POOL_QUERY ioctls will return pools configuration
8547c478bd9Sstevel@tonic-gate  * at the beginning of transaction.
8557c478bd9Sstevel@tonic-gate  */
8567c478bd9Sstevel@tonic-gate int
pool_commit(int state)8577c478bd9Sstevel@tonic-gate pool_commit(int state)
8587c478bd9Sstevel@tonic-gate {
8597c478bd9Sstevel@tonic-gate 	ea_object_t *eo_system;
8607c478bd9Sstevel@tonic-gate 	int ret = 0;
8617c478bd9Sstevel@tonic-gate 
8627c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8637c478bd9Sstevel@tonic-gate 
8647c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
8657c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
8667c478bd9Sstevel@tonic-gate 	switch (state) {
8677c478bd9Sstevel@tonic-gate 	case 1:
8687c478bd9Sstevel@tonic-gate 		/*
8697c478bd9Sstevel@tonic-gate 		 * Beginning commit transation.
8707c478bd9Sstevel@tonic-gate 		 */
8717c478bd9Sstevel@tonic-gate 		if (pool_buf != NULL)		/* transaction in progress */
8727c478bd9Sstevel@tonic-gate 			return (EBUSY);
8737c478bd9Sstevel@tonic-gate 		eo_system = pool_system_pack();		/* 1. pack system */
8747c478bd9Sstevel@tonic-gate 		(void) pool_pool_pack(eo_system);	/* 2. pack all pools */
8757c478bd9Sstevel@tonic-gate 		(void) pool_pset_pack(eo_system);	/* 3. pack all psets */
8767c478bd9Sstevel@tonic-gate 		pool_bufsz = ea_pack_object(eo_system, NULL, 0);
8777c478bd9Sstevel@tonic-gate 		pool_buf = kmem_alloc(pool_bufsz, KM_SLEEP);
8787c478bd9Sstevel@tonic-gate 		pool_bufsz = ea_pack_object(eo_system, pool_buf, pool_bufsz);
8797c478bd9Sstevel@tonic-gate 		ea_free_object(eo_system, EUP_ALLOC);
8807c478bd9Sstevel@tonic-gate 		break;
8817c478bd9Sstevel@tonic-gate 	case 0:
8827c478bd9Sstevel@tonic-gate 		/*
8837c478bd9Sstevel@tonic-gate 		 * Finishing commit transaction.
8847c478bd9Sstevel@tonic-gate 		 */
8857c478bd9Sstevel@tonic-gate 		if (pool_buf != NULL) {
8867c478bd9Sstevel@tonic-gate 			kmem_free(pool_buf, pool_bufsz);
8877c478bd9Sstevel@tonic-gate 			pool_buf = NULL;
8887c478bd9Sstevel@tonic-gate 			pool_bufsz = 0;
8897c478bd9Sstevel@tonic-gate 		}
8907c478bd9Sstevel@tonic-gate 		break;
8917c478bd9Sstevel@tonic-gate 	default:
8927c478bd9Sstevel@tonic-gate 		ret = EINVAL;
8937c478bd9Sstevel@tonic-gate 	}
8947c478bd9Sstevel@tonic-gate 	return (ret);
8957c478bd9Sstevel@tonic-gate }
8967c478bd9Sstevel@tonic-gate 
8977c478bd9Sstevel@tonic-gate /*
8987c478bd9Sstevel@tonic-gate  * Check is the specified property is special
8997c478bd9Sstevel@tonic-gate  */
9007c478bd9Sstevel@tonic-gate static pool_property_t *
pool_property_find(char * name,pool_property_t * list)9017c478bd9Sstevel@tonic-gate pool_property_find(char *name, pool_property_t *list)
9027c478bd9Sstevel@tonic-gate {
9037c478bd9Sstevel@tonic-gate 	pool_property_t *prop;
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	for (prop = list; prop->pp_name != NULL; prop++)
9067c478bd9Sstevel@tonic-gate 		if (strcmp(prop->pp_name, name) == 0)
9077c478bd9Sstevel@tonic-gate 			return (prop);
9087c478bd9Sstevel@tonic-gate 	return (NULL);
9097c478bd9Sstevel@tonic-gate }
9107c478bd9Sstevel@tonic-gate 
9117c478bd9Sstevel@tonic-gate static pool_property_t pool_prop_sys[] = {
9127c478bd9Sstevel@tonic-gate 	{ "system.name",		DATA_TYPE_STRING,	PP_RDWR },
9137c478bd9Sstevel@tonic-gate 	{ "system.comment",		DATA_TYPE_STRING,	PP_RDWR },
9147c478bd9Sstevel@tonic-gate 	{ "system.version",		DATA_TYPE_UINT64,	PP_READ },
9157c478bd9Sstevel@tonic-gate 	{ "system.bind-default",	DATA_TYPE_BYTE,		PP_RDWR },
9167c478bd9Sstevel@tonic-gate 	{ "system.allocate-method",	DATA_TYPE_STRING,
9177c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
9187c478bd9Sstevel@tonic-gate 	{ "system.poold.log-level",	DATA_TYPE_STRING,
9197c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
9207c478bd9Sstevel@tonic-gate 	{ "system.poold.log-location",	DATA_TYPE_STRING,
9217c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
9227c478bd9Sstevel@tonic-gate 	{ "system.poold.monitor-interval",	DATA_TYPE_UINT64,
9237c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
9247c478bd9Sstevel@tonic-gate 	{ "system.poold.history-file",	DATA_TYPE_STRING,
9257c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
9267c478bd9Sstevel@tonic-gate 	{ "system.poold.objectives",	DATA_TYPE_STRING,
9277c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
9287c478bd9Sstevel@tonic-gate 	{ NULL,				0,			0 }
9297c478bd9Sstevel@tonic-gate };
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate static pool_property_t pool_prop_pool[] = {
9327c478bd9Sstevel@tonic-gate 	{ "pool.sys_id",		DATA_TYPE_UINT64,	PP_READ },
9337c478bd9Sstevel@tonic-gate 	{ "pool.name",			DATA_TYPE_STRING,	PP_RDWR },
9347c478bd9Sstevel@tonic-gate 	{ "pool.default",		DATA_TYPE_BYTE,		PP_READ },
9357c478bd9Sstevel@tonic-gate 	{ "pool.active",		DATA_TYPE_BYTE,		PP_RDWR },
9367c478bd9Sstevel@tonic-gate 	{ "pool.importance",		DATA_TYPE_INT64,	PP_RDWR },
9377c478bd9Sstevel@tonic-gate 	{ "pool.comment",		DATA_TYPE_STRING,	PP_RDWR },
9387c478bd9Sstevel@tonic-gate 	{ "pool.scheduler",		DATA_TYPE_STRING,
9397c478bd9Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
9407c478bd9Sstevel@tonic-gate 	{ NULL,				0,			0 }
9417c478bd9Sstevel@tonic-gate };
9427c478bd9Sstevel@tonic-gate 
9437c478bd9Sstevel@tonic-gate /*
9447c478bd9Sstevel@tonic-gate  * Common routine to put new property on the specified list
9457c478bd9Sstevel@tonic-gate  */
9467c478bd9Sstevel@tonic-gate int
pool_propput_common(nvlist_t * nvlist,nvpair_t * pair,pool_property_t * props)9477c478bd9Sstevel@tonic-gate pool_propput_common(nvlist_t *nvlist, nvpair_t *pair, pool_property_t *props)
9487c478bd9Sstevel@tonic-gate {
9497c478bd9Sstevel@tonic-gate 	pool_property_t *prop;
9507c478bd9Sstevel@tonic-gate 
9517c478bd9Sstevel@tonic-gate 	if ((prop = pool_property_find(nvpair_name(pair), props)) != NULL) {
9527c478bd9Sstevel@tonic-gate 		/*
9537c478bd9Sstevel@tonic-gate 		 * No read-only properties or properties with bad types
9547c478bd9Sstevel@tonic-gate 		 */
9557c478bd9Sstevel@tonic-gate 		if (!(prop->pp_perm & PP_WRITE) ||
9567c478bd9Sstevel@tonic-gate 		    prop->pp_type != nvpair_type(pair))
9577c478bd9Sstevel@tonic-gate 			return (EINVAL);
9587c478bd9Sstevel@tonic-gate 	}
9597c478bd9Sstevel@tonic-gate 	return (nvlist_add_nvpair(nvlist, pair));
9607c478bd9Sstevel@tonic-gate }
9617c478bd9Sstevel@tonic-gate 
9627c478bd9Sstevel@tonic-gate /*
9637c478bd9Sstevel@tonic-gate  * Common routine to remove property from the given list
9647c478bd9Sstevel@tonic-gate  */
9657c478bd9Sstevel@tonic-gate int
pool_proprm_common(nvlist_t * nvlist,char * name,pool_property_t * props)9667c478bd9Sstevel@tonic-gate pool_proprm_common(nvlist_t *nvlist, char *name, pool_property_t *props)
9677c478bd9Sstevel@tonic-gate {
9687c478bd9Sstevel@tonic-gate 	pool_property_t *prop;
9697c478bd9Sstevel@tonic-gate 
9707c478bd9Sstevel@tonic-gate 	if ((prop = pool_property_find(name, props)) != NULL) {
9717c478bd9Sstevel@tonic-gate 		if (!(prop->pp_perm & PP_OPTIONAL))
9727c478bd9Sstevel@tonic-gate 			return (EINVAL);
9737c478bd9Sstevel@tonic-gate 	}
9747c478bd9Sstevel@tonic-gate 	return (nvlist_remove_all(nvlist, name));
9757c478bd9Sstevel@tonic-gate }
9767c478bd9Sstevel@tonic-gate 
9777c478bd9Sstevel@tonic-gate static int
pool_system_propput(nvpair_t * pair)9787c478bd9Sstevel@tonic-gate pool_system_propput(nvpair_t *pair)
9797c478bd9Sstevel@tonic-gate {
9807c478bd9Sstevel@tonic-gate 	int ret;
9817c478bd9Sstevel@tonic-gate 
9827c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
9837c478bd9Sstevel@tonic-gate 	ret = pool_propput_common(pool_sys_prop, pair, pool_prop_sys);
9847c478bd9Sstevel@tonic-gate 	if (ret == 0)
9857c478bd9Sstevel@tonic-gate 		pool_sys_mod = gethrtime();
9867c478bd9Sstevel@tonic-gate 	return (ret);
9877c478bd9Sstevel@tonic-gate }
9887c478bd9Sstevel@tonic-gate 
9897c478bd9Sstevel@tonic-gate static int
pool_system_proprm(char * name)9907c478bd9Sstevel@tonic-gate pool_system_proprm(char *name)
9917c478bd9Sstevel@tonic-gate {
9927c478bd9Sstevel@tonic-gate 	int ret;
9937c478bd9Sstevel@tonic-gate 
9947c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
9957c478bd9Sstevel@tonic-gate 	ret = pool_proprm_common(pool_sys_prop, name, pool_prop_sys);
9967c478bd9Sstevel@tonic-gate 	if (ret == 0)
9977c478bd9Sstevel@tonic-gate 		pool_sys_mod = gethrtime();
9987c478bd9Sstevel@tonic-gate 	return (ret);
9997c478bd9Sstevel@tonic-gate }
10007c478bd9Sstevel@tonic-gate 
10017c478bd9Sstevel@tonic-gate static int
pool_pool_propput(poolid_t poolid,nvpair_t * pair)10027c478bd9Sstevel@tonic-gate pool_pool_propput(poolid_t poolid, nvpair_t *pair)
10037c478bd9Sstevel@tonic-gate {
10047c478bd9Sstevel@tonic-gate 	pool_t *pool;
10057c478bd9Sstevel@tonic-gate 	int ret;
10067c478bd9Sstevel@tonic-gate 
10077c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10087c478bd9Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
10097c478bd9Sstevel@tonic-gate 		return (ESRCH);
10107c478bd9Sstevel@tonic-gate 	ret = pool_propput_common(pool->pool_props, pair, pool_prop_pool);
10117c478bd9Sstevel@tonic-gate 	if (ret == 0)
10127c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
10137c478bd9Sstevel@tonic-gate 	return (ret);
10147c478bd9Sstevel@tonic-gate }
10157c478bd9Sstevel@tonic-gate 
10167c478bd9Sstevel@tonic-gate static int
pool_pool_proprm(poolid_t poolid,char * name)10177c478bd9Sstevel@tonic-gate pool_pool_proprm(poolid_t poolid, char *name)
10187c478bd9Sstevel@tonic-gate {
10197c478bd9Sstevel@tonic-gate 	int ret;
10207c478bd9Sstevel@tonic-gate 	pool_t *pool;
10217c478bd9Sstevel@tonic-gate 
10227c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10237c478bd9Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
10247c478bd9Sstevel@tonic-gate 		return (ESRCH);
10257c478bd9Sstevel@tonic-gate 	ret = pool_proprm_common(pool->pool_props, name, pool_prop_pool);
10267c478bd9Sstevel@tonic-gate 	if (ret == 0)
10277c478bd9Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
10287c478bd9Sstevel@tonic-gate 	return (ret);
10297c478bd9Sstevel@tonic-gate }
10307c478bd9Sstevel@tonic-gate 
10317c478bd9Sstevel@tonic-gate int
pool_propput(int class,int subclass,id_t id,nvpair_t * pair)10327c478bd9Sstevel@tonic-gate pool_propput(int class, int subclass, id_t id, nvpair_t *pair)
10337c478bd9Sstevel@tonic-gate {
10347c478bd9Sstevel@tonic-gate 	int ret;
10357c478bd9Sstevel@tonic-gate 
10367c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10377c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
10387c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
10397c478bd9Sstevel@tonic-gate 	switch (class) {
10407c478bd9Sstevel@tonic-gate 	case PEC_SYSTEM:
10417c478bd9Sstevel@tonic-gate 		ret = pool_system_propput(pair);
10427c478bd9Sstevel@tonic-gate 		break;
10437c478bd9Sstevel@tonic-gate 	case PEC_POOL:
10447c478bd9Sstevel@tonic-gate 		ret = pool_pool_propput((poolid_t)id, pair);
10457c478bd9Sstevel@tonic-gate 		break;
10467c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
10477c478bd9Sstevel@tonic-gate 		switch (subclass) {
10487c478bd9Sstevel@tonic-gate 		case PREC_PSET:
10497c478bd9Sstevel@tonic-gate 			ret = pool_pset_propput((psetid_t)id, pair);
10507c478bd9Sstevel@tonic-gate 			break;
10517c478bd9Sstevel@tonic-gate 		default:
10527c478bd9Sstevel@tonic-gate 			ret = EINVAL;
10537c478bd9Sstevel@tonic-gate 		}
10547c478bd9Sstevel@tonic-gate 		break;
10557c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
10567c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
10577c478bd9Sstevel@tonic-gate 		break;
10587c478bd9Sstevel@tonic-gate 	case PEC_COMP:
10597c478bd9Sstevel@tonic-gate 		switch (subclass) {
10607c478bd9Sstevel@tonic-gate 		case PCEC_CPU:
10617c478bd9Sstevel@tonic-gate 			ret = pool_cpu_propput((processorid_t)id, pair);
10627c478bd9Sstevel@tonic-gate 			break;
10637c478bd9Sstevel@tonic-gate 		default:
10647c478bd9Sstevel@tonic-gate 			ret = EINVAL;
10657c478bd9Sstevel@tonic-gate 		}
10667c478bd9Sstevel@tonic-gate 		break;
10677c478bd9Sstevel@tonic-gate 	default:
10687c478bd9Sstevel@tonic-gate 		ret = EINVAL;
10697c478bd9Sstevel@tonic-gate 	}
10707c478bd9Sstevel@tonic-gate 	return (ret);
10717c478bd9Sstevel@tonic-gate }
10727c478bd9Sstevel@tonic-gate 
10737c478bd9Sstevel@tonic-gate int
pool_proprm(int class,int subclass,id_t id,char * name)10747c478bd9Sstevel@tonic-gate pool_proprm(int class, int subclass, id_t id, char *name)
10757c478bd9Sstevel@tonic-gate {
10767c478bd9Sstevel@tonic-gate 	int ret;
10777c478bd9Sstevel@tonic-gate 
10787c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10797c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
10807c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
10817c478bd9Sstevel@tonic-gate 	switch (class) {
10827c478bd9Sstevel@tonic-gate 	case PEC_SYSTEM:
10837c478bd9Sstevel@tonic-gate 		ret = pool_system_proprm(name);
10847c478bd9Sstevel@tonic-gate 		break;
10857c478bd9Sstevel@tonic-gate 	case PEC_POOL:
10867c478bd9Sstevel@tonic-gate 		ret = pool_pool_proprm((poolid_t)id, name);
10877c478bd9Sstevel@tonic-gate 		break;
10887c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
10897c478bd9Sstevel@tonic-gate 		switch (subclass) {
10907c478bd9Sstevel@tonic-gate 		case PREC_PSET:
10917c478bd9Sstevel@tonic-gate 			ret = pool_pset_proprm((psetid_t)id, name);
10927c478bd9Sstevel@tonic-gate 			break;
10937c478bd9Sstevel@tonic-gate 		default:
10947c478bd9Sstevel@tonic-gate 			ret = EINVAL;
10957c478bd9Sstevel@tonic-gate 		}
10967c478bd9Sstevel@tonic-gate 		break;
10977c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
10987c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
10997c478bd9Sstevel@tonic-gate 		break;
11007c478bd9Sstevel@tonic-gate 	case PEC_COMP:
11017c478bd9Sstevel@tonic-gate 		switch (subclass) {
11027c478bd9Sstevel@tonic-gate 		case PCEC_CPU:
11037c478bd9Sstevel@tonic-gate 			ret = pool_cpu_proprm((processorid_t)id, name);
11047c478bd9Sstevel@tonic-gate 			break;
11057c478bd9Sstevel@tonic-gate 		default:
11067c478bd9Sstevel@tonic-gate 			ret = EINVAL;
11077c478bd9Sstevel@tonic-gate 		}
11087c478bd9Sstevel@tonic-gate 		break;
11097c478bd9Sstevel@tonic-gate 	default:
11107c478bd9Sstevel@tonic-gate 		ret = EINVAL;
11117c478bd9Sstevel@tonic-gate 	}
11127c478bd9Sstevel@tonic-gate 	return (ret);
11137c478bd9Sstevel@tonic-gate }
11147c478bd9Sstevel@tonic-gate 
11157c478bd9Sstevel@tonic-gate int
pool_propget(char * name,int class,int subclass,id_t id,nvlist_t ** nvlp)11167c478bd9Sstevel@tonic-gate pool_propget(char *name, int class, int subclass, id_t id, nvlist_t **nvlp)
11177c478bd9Sstevel@tonic-gate {
11187c478bd9Sstevel@tonic-gate 	int ret;
11197c478bd9Sstevel@tonic-gate 	nvlist_t *nvl;
11207c478bd9Sstevel@tonic-gate 
11217c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
11227c478bd9Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
11237c478bd9Sstevel@tonic-gate 		return (ENOTACTIVE);
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate 	(void) nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
11267c478bd9Sstevel@tonic-gate 
11277c478bd9Sstevel@tonic-gate 	switch (class) {
11287c478bd9Sstevel@tonic-gate 	case PEC_SYSTEM:
11297c478bd9Sstevel@tonic-gate 	case PEC_POOL:
11307c478bd9Sstevel@tonic-gate 		ret = EINVAL;
11317c478bd9Sstevel@tonic-gate 		break;
11327c478bd9Sstevel@tonic-gate 	case PEC_RES_COMP:
11337c478bd9Sstevel@tonic-gate 		switch (subclass) {
11347c478bd9Sstevel@tonic-gate 		case PREC_PSET:
11357c478bd9Sstevel@tonic-gate 			ret = pool_pset_propget((psetid_t)id, name, nvl);
11367c478bd9Sstevel@tonic-gate 			break;
11377c478bd9Sstevel@tonic-gate 		default:
11387c478bd9Sstevel@tonic-gate 			ret = EINVAL;
11397c478bd9Sstevel@tonic-gate 		}
11407c478bd9Sstevel@tonic-gate 		break;
11417c478bd9Sstevel@tonic-gate 	case PEC_RES_AGG:
11427c478bd9Sstevel@tonic-gate 		ret = ENOTSUP;
11437c478bd9Sstevel@tonic-gate 		break;
11447c478bd9Sstevel@tonic-gate 	case PEC_COMP:
11457c478bd9Sstevel@tonic-gate 		switch (subclass) {
11467c478bd9Sstevel@tonic-gate 		case PCEC_CPU:
11477c478bd9Sstevel@tonic-gate 			ret = pool_cpu_propget((processorid_t)id, name, nvl);
11487c478bd9Sstevel@tonic-gate 			break;
11497c478bd9Sstevel@tonic-gate 		default:
11507c478bd9Sstevel@tonic-gate 			ret = EINVAL;
11517c478bd9Sstevel@tonic-gate 		}
11527c478bd9Sstevel@tonic-gate 		break;
11537c478bd9Sstevel@tonic-gate 	default:
11547c478bd9Sstevel@tonic-gate 		ret = EINVAL;
11557c478bd9Sstevel@tonic-gate 	}
11567c478bd9Sstevel@tonic-gate 	if (ret == 0)
11577c478bd9Sstevel@tonic-gate 		*nvlp = nvl;
11587c478bd9Sstevel@tonic-gate 	else
11597c478bd9Sstevel@tonic-gate 		nvlist_free(nvl);
11607c478bd9Sstevel@tonic-gate 	return (ret);
11617c478bd9Sstevel@tonic-gate }
11627c478bd9Sstevel@tonic-gate 
11637c478bd9Sstevel@tonic-gate /*
11647c478bd9Sstevel@tonic-gate  * pool_bind_wake and pool_bind_wakeall are helper functions to undo PBWAITs
11657c478bd9Sstevel@tonic-gate  * in case of failure in pool_do_bind().
11667c478bd9Sstevel@tonic-gate  */
11677c478bd9Sstevel@tonic-gate static void
pool_bind_wake(proc_t * p)11687c478bd9Sstevel@tonic-gate pool_bind_wake(proc_t *p)
11697c478bd9Sstevel@tonic-gate {
11707c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
11717c478bd9Sstevel@tonic-gate 
11727c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11737c478bd9Sstevel@tonic-gate 	ASSERT(p->p_poolflag & PBWAIT);
11747c478bd9Sstevel@tonic-gate 	if (p->p_poolcnt > 0) {
11757c478bd9Sstevel@tonic-gate 		mutex_enter(&pool_barrier_lock);
11767c478bd9Sstevel@tonic-gate 		pool_barrier_count -= p->p_poolcnt;
11777c478bd9Sstevel@tonic-gate 		mutex_exit(&pool_barrier_lock);
11787c478bd9Sstevel@tonic-gate 	}
11797c478bd9Sstevel@tonic-gate 	p->p_poolflag &= ~PBWAIT;
11807c478bd9Sstevel@tonic-gate 	cv_signal(&p->p_poolcv);
11817c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11827c478bd9Sstevel@tonic-gate }
11837c478bd9Sstevel@tonic-gate 
11847c478bd9Sstevel@tonic-gate static void
pool_bind_wakeall(proc_t ** procs)11857c478bd9Sstevel@tonic-gate pool_bind_wakeall(proc_t **procs)
11867c478bd9Sstevel@tonic-gate {
11877c478bd9Sstevel@tonic-gate 	proc_t *p, **pp;
11887c478bd9Sstevel@tonic-gate 
11897c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
11907c478bd9Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++)
11917c478bd9Sstevel@tonic-gate 		pool_bind_wake(p);
11927c478bd9Sstevel@tonic-gate }
11937c478bd9Sstevel@tonic-gate 
11947c478bd9Sstevel@tonic-gate /*
11957c478bd9Sstevel@tonic-gate  * Return the scheduling class for this pool, or
11967c478bd9Sstevel@tonic-gate  * 	POOL_CLASS_UNSET if not set
11977c478bd9Sstevel@tonic-gate  * 	POOL_CLASS_INVAL if set to an invalid class ID.
11987c478bd9Sstevel@tonic-gate  */
11997c478bd9Sstevel@tonic-gate id_t
pool_get_class(pool_t * pool)12007c478bd9Sstevel@tonic-gate pool_get_class(pool_t *pool)
12017c478bd9Sstevel@tonic-gate {
12027c478bd9Sstevel@tonic-gate 	char *name;
12037c478bd9Sstevel@tonic-gate 	id_t cid;
12047c478bd9Sstevel@tonic-gate 
12057c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
12067c478bd9Sstevel@tonic-gate 
12077c478bd9Sstevel@tonic-gate 	if (nvlist_lookup_string(pool->pool_props, "pool.scheduler",
12087c478bd9Sstevel@tonic-gate 	    &name) == 0) {
12097c478bd9Sstevel@tonic-gate 		if (getcidbyname(name, &cid) == 0)
12107c478bd9Sstevel@tonic-gate 			return (cid);
12117c478bd9Sstevel@tonic-gate 		else
12127c478bd9Sstevel@tonic-gate 			return (POOL_CLASS_INVAL);
12137c478bd9Sstevel@tonic-gate 	}
12147c478bd9Sstevel@tonic-gate 	return (POOL_CLASS_UNSET);
12157c478bd9Sstevel@tonic-gate }
12167c478bd9Sstevel@tonic-gate 
12177c478bd9Sstevel@tonic-gate /*
12187c478bd9Sstevel@tonic-gate  * Move process to the new scheduling class.
12197c478bd9Sstevel@tonic-gate  */
12207c478bd9Sstevel@tonic-gate static void
pool_change_class(proc_t * p,id_t cid)12217c478bd9Sstevel@tonic-gate pool_change_class(proc_t *p, id_t cid)
12227c478bd9Sstevel@tonic-gate {
12237c478bd9Sstevel@tonic-gate 	kthread_t *t;
12247c478bd9Sstevel@tonic-gate 	void *cldata;
12257c478bd9Sstevel@tonic-gate 	id_t oldcid;
12267c478bd9Sstevel@tonic-gate 	void **bufs;
12277c478bd9Sstevel@tonic-gate 	void **buf;
12287c478bd9Sstevel@tonic-gate 	int nlwp;
12297c478bd9Sstevel@tonic-gate 	int ret;
12307c478bd9Sstevel@tonic-gate 	int i;
12317c478bd9Sstevel@tonic-gate 
12327c478bd9Sstevel@tonic-gate 	/*
12337c478bd9Sstevel@tonic-gate 	 * Do not move kernel processes (such as zsched).
12347c478bd9Sstevel@tonic-gate 	 */
12357c478bd9Sstevel@tonic-gate 	if (p->p_flag & SSYS)
12367c478bd9Sstevel@tonic-gate 		return;
12377c478bd9Sstevel@tonic-gate 	/*
12387c478bd9Sstevel@tonic-gate 	 * This process is in the pool barrier, so it can't possibly be
12397c478bd9Sstevel@tonic-gate 	 * adding new threads and we can use p_lwpcnt + p_zombcnt + 1
12407c478bd9Sstevel@tonic-gate 	 * (for possible agent LWP which doesn't use pool barrier) as
12417c478bd9Sstevel@tonic-gate 	 * our upper bound.
12427c478bd9Sstevel@tonic-gate 	 */
12437c478bd9Sstevel@tonic-gate 	nlwp = p->p_lwpcnt + p->p_zombcnt + 1;
12447c478bd9Sstevel@tonic-gate 
12457c478bd9Sstevel@tonic-gate 	/*
12467c478bd9Sstevel@tonic-gate 	 * Pre-allocate scheduling class specific buffers before
12477c478bd9Sstevel@tonic-gate 	 * grabbing p_lock.
12487c478bd9Sstevel@tonic-gate 	 */
12497c478bd9Sstevel@tonic-gate 	bufs = kmem_zalloc(nlwp * sizeof (void *), KM_SLEEP);
12507c478bd9Sstevel@tonic-gate 	for (i = 0, buf = bufs; i < nlwp; i++, buf++) {
12517c478bd9Sstevel@tonic-gate 		ret = CL_ALLOC(buf, cid, KM_SLEEP);
12527c478bd9Sstevel@tonic-gate 		ASSERT(ret == 0);
12537c478bd9Sstevel@tonic-gate 	}
12547c478bd9Sstevel@tonic-gate 
12557c478bd9Sstevel@tonic-gate 	/*
12567c478bd9Sstevel@tonic-gate 	 * Move threads one by one to the new scheduling class.
12577c478bd9Sstevel@tonic-gate 	 * This never fails because we have all the right
12587c478bd9Sstevel@tonic-gate 	 * privileges here.
12597c478bd9Sstevel@tonic-gate 	 */
12607c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
12617c478bd9Sstevel@tonic-gate 	ASSERT(p->p_poolflag & PBWAIT);
12627c478bd9Sstevel@tonic-gate 	buf = bufs;
12637c478bd9Sstevel@tonic-gate 	t = p->p_tlist;
12647c478bd9Sstevel@tonic-gate 	ASSERT(t != NULL);
12657c478bd9Sstevel@tonic-gate 	do {
12667c478bd9Sstevel@tonic-gate 		if (t->t_cid != cid) {
12677c478bd9Sstevel@tonic-gate 			oldcid = t->t_cid;
12687c478bd9Sstevel@tonic-gate 			cldata = t->t_cldata;
12697c478bd9Sstevel@tonic-gate 			ret = CL_ENTERCLASS(t, cid, NULL, NULL, *buf);
12707c478bd9Sstevel@tonic-gate 			ASSERT(ret == 0);
12717c478bd9Sstevel@tonic-gate 			CL_EXITCLASS(oldcid, cldata);
1272d4204c85Sraf 			schedctl_set_cidpri(t);
12737c478bd9Sstevel@tonic-gate 			*buf++ = NULL;
12747c478bd9Sstevel@tonic-gate 		}
12757c478bd9Sstevel@tonic-gate 	} while ((t = t->t_forw) != p->p_tlist);
12767c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
12777c478bd9Sstevel@tonic-gate 	/*
12787c478bd9Sstevel@tonic-gate 	 * Free unused scheduling class specific buffers.
12797c478bd9Sstevel@tonic-gate 	 */
12807c478bd9Sstevel@tonic-gate 	for (i = 0, buf = bufs; i < nlwp; i++, buf++) {
12817c478bd9Sstevel@tonic-gate 		if (*buf != NULL) {
12827c478bd9Sstevel@tonic-gate 			CL_FREE(cid, *buf);
12837c478bd9Sstevel@tonic-gate 			*buf = NULL;
12847c478bd9Sstevel@tonic-gate 		}
12857c478bd9Sstevel@tonic-gate 	}
12867c478bd9Sstevel@tonic-gate 	kmem_free(bufs, nlwp * sizeof (void *));
12877c478bd9Sstevel@tonic-gate }
12887c478bd9Sstevel@tonic-gate 
12890dc2366fSVenugopal Iyer void
pool_get_name(pool_t * pool,char ** name)12900dc2366fSVenugopal Iyer pool_get_name(pool_t *pool, char **name)
12910dc2366fSVenugopal Iyer {
12920dc2366fSVenugopal Iyer 	ASSERT(pool_lock_held());
12930dc2366fSVenugopal Iyer 
12940dc2366fSVenugopal Iyer 	(void) nvlist_lookup_string(pool->pool_props, "pool.name", name);
12950dc2366fSVenugopal Iyer 
12960dc2366fSVenugopal Iyer 	ASSERT(strlen(*name) != 0);
12970dc2366fSVenugopal Iyer }
12980dc2366fSVenugopal Iyer 
12990dc2366fSVenugopal Iyer 
13007c478bd9Sstevel@tonic-gate /*
13017c478bd9Sstevel@tonic-gate  * The meat of the bind operation.  The steps in pool_do_bind are:
13027c478bd9Sstevel@tonic-gate  *
13037c478bd9Sstevel@tonic-gate  * 1) Set PBWAIT in the p_poolflag of any process of interest, and add all
13047c478bd9Sstevel@tonic-gate  *    such processes to an array.  For any interesting process that has
13057c478bd9Sstevel@tonic-gate  *    threads inside the pool barrier set, increment a counter by the
13067c478bd9Sstevel@tonic-gate  *    count of such threads.  Once PBWAIT is set on a process, that process
13077c478bd9Sstevel@tonic-gate  *    will not disappear.
13087c478bd9Sstevel@tonic-gate  *
13097c478bd9Sstevel@tonic-gate  * 2) Wait for the counter from step 2 to drop to zero.  Any process which
13107c478bd9Sstevel@tonic-gate  *    calls pool_barrier_exit() and notices that PBWAIT has been set on it
13117c478bd9Sstevel@tonic-gate  *    will decrement that counter before going to sleep, and the process
13127c478bd9Sstevel@tonic-gate  *    calling pool_barrier_exit() which does the final decrement will wake us.
13137c478bd9Sstevel@tonic-gate  *
13147c478bd9Sstevel@tonic-gate  * 3) For each interesting process, perform a calculation on it to see if
13157c478bd9Sstevel@tonic-gate  *    the bind will actually succeed.  This uses the following three
13167c478bd9Sstevel@tonic-gate  *    resource-set-specific functions:
13177c478bd9Sstevel@tonic-gate  *
13187c478bd9Sstevel@tonic-gate  *    - int set_bind_start(procs, pool)
13197c478bd9Sstevel@tonic-gate  *
13207c478bd9Sstevel@tonic-gate  *      Determine whether the given array of processes can be bound to the
13217c478bd9Sstevel@tonic-gate  *      resource set associated with the given pool.  If it can, take and hold
13227c478bd9Sstevel@tonic-gate  *      any locks necessary to ensure that the operation will succeed, and
13237c478bd9Sstevel@tonic-gate  *      make any necessary reservations in the target resource set.  If it
13247c478bd9Sstevel@tonic-gate  *      can't, return failure with no reservations made and no new locks held.
13257c478bd9Sstevel@tonic-gate  *
13267c478bd9Sstevel@tonic-gate  *    - void set_bind_abort(procs, pool)
13277c478bd9Sstevel@tonic-gate  *
13287c478bd9Sstevel@tonic-gate  *      set_bind_start() has completed successfully, but another resource set's
13297c478bd9Sstevel@tonic-gate  *      set_bind_start() has failed, and we haven't begun the bind yet.  Undo
13307c478bd9Sstevel@tonic-gate  *      any reservations made and drop any locks acquired by our
13317c478bd9Sstevel@tonic-gate  *      set_bind_start().
13327c478bd9Sstevel@tonic-gate  *
13337c478bd9Sstevel@tonic-gate  *    - void set_bind_finish(void)
13347c478bd9Sstevel@tonic-gate  *
13357c478bd9Sstevel@tonic-gate  *      The bind has completed successfully.  The processes have been released,
13367c478bd9Sstevel@tonic-gate  *      and the reservation acquired in set_bind_start() has been depleted as
13377c478bd9Sstevel@tonic-gate  *      the processes have finished their bindings.  Drop any locks acquired by
13387c478bd9Sstevel@tonic-gate  *      set_bind_start().
13397c478bd9Sstevel@tonic-gate  *
13407c478bd9Sstevel@tonic-gate  * 4) If we've decided that we can proceed with the bind, iterate through
13417c478bd9Sstevel@tonic-gate  *    the list of interesting processes, grab the necessary locks (which
13427c478bd9Sstevel@tonic-gate  *    may differ per resource set), perform the bind, and ASSERT that it
13437c478bd9Sstevel@tonic-gate  *    succeeds.  Once a process has been rebound, it can be awakened.
13447c478bd9Sstevel@tonic-gate  *
13457c478bd9Sstevel@tonic-gate  * The operations from step 4 must be kept in sync with anything which might
13467c478bd9Sstevel@tonic-gate  * cause the bind operations (e.g., cpupart_bind_thread()) to fail, and
13477c478bd9Sstevel@tonic-gate  * are thus located in the same source files as the associated bind operations.
13487c478bd9Sstevel@tonic-gate  */
13497c478bd9Sstevel@tonic-gate int
pool_do_bind(pool_t * pool,idtype_t idtype,id_t id,int flags)13507c478bd9Sstevel@tonic-gate pool_do_bind(pool_t *pool, idtype_t idtype, id_t id, int flags)
13517c478bd9Sstevel@tonic-gate {
13527c478bd9Sstevel@tonic-gate 	extern uint_t nproc;
13537c478bd9Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(curthread);
13547c478bd9Sstevel@tonic-gate 	proc_t **pp, **procs;
13557c478bd9Sstevel@tonic-gate 	proc_t *prstart;
13567c478bd9Sstevel@tonic-gate 	int procs_count = 0;
13577c478bd9Sstevel@tonic-gate 	kproject_t *kpj;
13587c478bd9Sstevel@tonic-gate 	procset_t set;
13597c478bd9Sstevel@tonic-gate 	zone_t *zone;
13607c478bd9Sstevel@tonic-gate 	int procs_size;
13617c478bd9Sstevel@tonic-gate 	int rv = 0;
13627c478bd9Sstevel@tonic-gate 	proc_t *p;
13637c478bd9Sstevel@tonic-gate 	id_t cid = -1;
13647c478bd9Sstevel@tonic-gate 
13657c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
13667c478bd9Sstevel@tonic-gate 
13677c478bd9Sstevel@tonic-gate 	if ((cid = pool_get_class(pool)) == POOL_CLASS_INVAL)
13687c478bd9Sstevel@tonic-gate 		return (EINVAL);
13697c478bd9Sstevel@tonic-gate 
13707c478bd9Sstevel@tonic-gate 	if (idtype == P_ZONEID) {
13717c478bd9Sstevel@tonic-gate 		zone = zone_find_by_id(id);
13727c478bd9Sstevel@tonic-gate 		if (zone == NULL)
13737c478bd9Sstevel@tonic-gate 			return (ESRCH);
13747c478bd9Sstevel@tonic-gate 		if (zone_status_get(zone) > ZONE_IS_RUNNING) {
13757c478bd9Sstevel@tonic-gate 			zone_rele(zone);
13767c478bd9Sstevel@tonic-gate 			return (EBUSY);
13777c478bd9Sstevel@tonic-gate 		}
13787c478bd9Sstevel@tonic-gate 	}
13797c478bd9Sstevel@tonic-gate 
13807c478bd9Sstevel@tonic-gate 	if (idtype == P_PROJID) {
13810209230bSgjelinek 		kpj = project_hold_by_id(id, global_zone, PROJECT_HOLD_FIND);
13827c478bd9Sstevel@tonic-gate 		if (kpj == NULL)
13837c478bd9Sstevel@tonic-gate 			return (ESRCH);
13847c478bd9Sstevel@tonic-gate 		mutex_enter(&kpj->kpj_poolbind);
13857c478bd9Sstevel@tonic-gate 	}
13867c478bd9Sstevel@tonic-gate 
13877c478bd9Sstevel@tonic-gate 	if (idtype == P_PID) {
13887c478bd9Sstevel@tonic-gate 		/*
13897c478bd9Sstevel@tonic-gate 		 * Fast-path for a single process case.
13907c478bd9Sstevel@tonic-gate 		 */
13917c478bd9Sstevel@tonic-gate 		procs_size = 2;	/* procs is NULL-terminated */
13927c478bd9Sstevel@tonic-gate 		procs = kmem_zalloc(procs_size * sizeof (proc_t *), KM_SLEEP);
13937c478bd9Sstevel@tonic-gate 		mutex_enter(&pidlock);
13947c478bd9Sstevel@tonic-gate 	} else {
13957c478bd9Sstevel@tonic-gate 		/*
13967c478bd9Sstevel@tonic-gate 		 * We will need enough slots for proc_t pointers for as many as
13977c478bd9Sstevel@tonic-gate 		 * twice the number of currently running processes (assuming
13987c478bd9Sstevel@tonic-gate 		 * that each one could be in fork() creating a new child).
13997c478bd9Sstevel@tonic-gate 		 */
14007c478bd9Sstevel@tonic-gate 		for (;;) {
14017c478bd9Sstevel@tonic-gate 			procs_size = nproc * 2;
14027c478bd9Sstevel@tonic-gate 			procs = kmem_zalloc(procs_size * sizeof (proc_t *),
14037c478bd9Sstevel@tonic-gate 			    KM_SLEEP);
14047c478bd9Sstevel@tonic-gate 			mutex_enter(&pidlock);
14057c478bd9Sstevel@tonic-gate 
14067c478bd9Sstevel@tonic-gate 			if (nproc * 2 <= procs_size)
14077c478bd9Sstevel@tonic-gate 				break;
14087c478bd9Sstevel@tonic-gate 			/*
14097c478bd9Sstevel@tonic-gate 			 * If nproc has changed, try again.
14107c478bd9Sstevel@tonic-gate 			 */
14117c478bd9Sstevel@tonic-gate 			mutex_exit(&pidlock);
14127c478bd9Sstevel@tonic-gate 			kmem_free(procs, procs_size * sizeof (proc_t *));
14137c478bd9Sstevel@tonic-gate 		}
14147c478bd9Sstevel@tonic-gate 	}
14157c478bd9Sstevel@tonic-gate 
14167c478bd9Sstevel@tonic-gate 	if (id == P_MYID)
14177c478bd9Sstevel@tonic-gate 		id = getmyid(idtype);
14187c478bd9Sstevel@tonic-gate 	setprocset(&set, POP_AND, idtype, id, P_ALL, 0);
14197c478bd9Sstevel@tonic-gate 
14207c478bd9Sstevel@tonic-gate 	/*
14217c478bd9Sstevel@tonic-gate 	 * Do a first scan, and select target processes.
14227c478bd9Sstevel@tonic-gate 	 */
14237c478bd9Sstevel@tonic-gate 	if (idtype == P_PID)
14247c478bd9Sstevel@tonic-gate 		prstart = prfind(id);
14257c478bd9Sstevel@tonic-gate 	else
14267c478bd9Sstevel@tonic-gate 		prstart = practive;
14277c478bd9Sstevel@tonic-gate 	for (p = prstart, pp = procs; p != NULL; p = p->p_next) {
14287c478bd9Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
14297c478bd9Sstevel@tonic-gate 		/*
14307c478bd9Sstevel@tonic-gate 		 * Skip processes that don't match our (id, idtype) set or
14317c478bd9Sstevel@tonic-gate 		 * on the way of becoming zombies.  Skip kernel processes
14327c478bd9Sstevel@tonic-gate 		 * from the global zone.
14337c478bd9Sstevel@tonic-gate 		 */
14347c478bd9Sstevel@tonic-gate 		if (procinset(p, &set) == 0 ||
14357c478bd9Sstevel@tonic-gate 		    p->p_poolflag & PEXITED ||
14367c478bd9Sstevel@tonic-gate 		    ((p->p_flag & SSYS) && INGLOBALZONE(p))) {
14377c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
14387c478bd9Sstevel@tonic-gate 			continue;
14397c478bd9Sstevel@tonic-gate 		}
14407c478bd9Sstevel@tonic-gate 		if (!INGLOBALZONE(p)) {
14417c478bd9Sstevel@tonic-gate 			switch (idtype) {
14427c478bd9Sstevel@tonic-gate 			case P_PID:
14437c478bd9Sstevel@tonic-gate 			case P_TASKID:
14447c478bd9Sstevel@tonic-gate 				/*
14457c478bd9Sstevel@tonic-gate 				 * Can't bind processes or tasks
14467c478bd9Sstevel@tonic-gate 				 * in local zones to pools.
14477c478bd9Sstevel@tonic-gate 				 */
14487c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
14497c478bd9Sstevel@tonic-gate 				mutex_exit(&pidlock);
14507c478bd9Sstevel@tonic-gate 				pool_bind_wakeall(procs);
14517c478bd9Sstevel@tonic-gate 				rv = EINVAL;
14527c478bd9Sstevel@tonic-gate 				goto out;
14537c478bd9Sstevel@tonic-gate 			case P_PROJID:
14547c478bd9Sstevel@tonic-gate 				/*
14557c478bd9Sstevel@tonic-gate 				 * Only projects in the global
14567c478bd9Sstevel@tonic-gate 				 * zone can be rebound.
14577c478bd9Sstevel@tonic-gate 				 */
14587c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
14597c478bd9Sstevel@tonic-gate 				continue;
14607c478bd9Sstevel@tonic-gate 			case P_POOLID:
14617c478bd9Sstevel@tonic-gate 				/*
14627c478bd9Sstevel@tonic-gate 				 * When rebinding pools, processes can be
14637c478bd9Sstevel@tonic-gate 				 * in different zones.
14647c478bd9Sstevel@tonic-gate 				 */
14657c478bd9Sstevel@tonic-gate 				break;
14667c478bd9Sstevel@tonic-gate 			}
14677c478bd9Sstevel@tonic-gate 		}
14687c478bd9Sstevel@tonic-gate 
14697c478bd9Sstevel@tonic-gate 		p->p_poolflag |= PBWAIT;
14707c478bd9Sstevel@tonic-gate 		/*
14717c478bd9Sstevel@tonic-gate 		 * If some threads in this process are inside the pool
14727c478bd9Sstevel@tonic-gate 		 * barrier, add them to pool_barrier_count, as we have
14737c478bd9Sstevel@tonic-gate 		 * to wait for all of them to exit the barrier.
14747c478bd9Sstevel@tonic-gate 		 */
14757c478bd9Sstevel@tonic-gate 		if (p->p_poolcnt > 0) {
14767c478bd9Sstevel@tonic-gate 			mutex_enter(&pool_barrier_lock);
14777c478bd9Sstevel@tonic-gate 			pool_barrier_count += p->p_poolcnt;
14787c478bd9Sstevel@tonic-gate 			mutex_exit(&pool_barrier_lock);
14797c478bd9Sstevel@tonic-gate 		}
14807c478bd9Sstevel@tonic-gate 		ASSERT(pp < &procs[procs_size]);
14817c478bd9Sstevel@tonic-gate 		*pp++ = p;
14827c478bd9Sstevel@tonic-gate 		procs_count++;
14837c478bd9Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
14847c478bd9Sstevel@tonic-gate 
14857c478bd9Sstevel@tonic-gate 		/*
14867c478bd9Sstevel@tonic-gate 		 * We just found our process, so if we're only rebinding a
14877c478bd9Sstevel@tonic-gate 		 * single process then get out of this loop.
14887c478bd9Sstevel@tonic-gate 		 */
14897c478bd9Sstevel@tonic-gate 		if (idtype == P_PID)
14907c478bd9Sstevel@tonic-gate 			break;
14917c478bd9Sstevel@tonic-gate 	}
14927c478bd9Sstevel@tonic-gate 	*pp = NULL;	/* cap off the end of the array */
14937c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
14947c478bd9Sstevel@tonic-gate 
14957c478bd9Sstevel@tonic-gate 	/*
14967c478bd9Sstevel@tonic-gate 	 * Wait for relevant processes to stop before they try to enter the
14977c478bd9Sstevel@tonic-gate 	 * barrier or at the exit from the barrier.  Make sure that we do
14987c478bd9Sstevel@tonic-gate 	 * not get stopped here while we're holding pool_lock.  If we were
14997c478bd9Sstevel@tonic-gate 	 * requested to stop, or got a signal then return EAGAIN to let the
15007c478bd9Sstevel@tonic-gate 	 * library know that it needs to retry.
15017c478bd9Sstevel@tonic-gate 	 */
15027c478bd9Sstevel@tonic-gate 	mutex_enter(&pool_barrier_lock);
15037c478bd9Sstevel@tonic-gate 	lwp->lwp_nostop++;
15047c478bd9Sstevel@tonic-gate 	while (pool_barrier_count > 0) {
15057c478bd9Sstevel@tonic-gate 		(void) cv_wait_sig(&pool_barrier_cv, &pool_barrier_lock);
15067c478bd9Sstevel@tonic-gate 		if (pool_barrier_count > 0) {
15077c478bd9Sstevel@tonic-gate 			/*
15087c478bd9Sstevel@tonic-gate 			 * We either got a signal or were requested to
15097c478bd9Sstevel@tonic-gate 			 * stop by /proc.  Bail out with EAGAIN.  If we were
15107c478bd9Sstevel@tonic-gate 			 * requested to stop, we'll stop in post_syscall()
15117c478bd9Sstevel@tonic-gate 			 * on our way back to userland.
15127c478bd9Sstevel@tonic-gate 			 */
15137c478bd9Sstevel@tonic-gate 			mutex_exit(&pool_barrier_lock);
15147c478bd9Sstevel@tonic-gate 			pool_bind_wakeall(procs);
15157c478bd9Sstevel@tonic-gate 			lwp->lwp_nostop--;
15167c478bd9Sstevel@tonic-gate 			rv = EAGAIN;
15177c478bd9Sstevel@tonic-gate 			goto out;
15187c478bd9Sstevel@tonic-gate 		}
15197c478bd9Sstevel@tonic-gate 	}
15207c478bd9Sstevel@tonic-gate 	lwp->lwp_nostop--;
15217c478bd9Sstevel@tonic-gate 	mutex_exit(&pool_barrier_lock);
15227c478bd9Sstevel@tonic-gate 
1523fec0805bSSurya Prakki 	if (idtype == P_PID) {
1524fec0805bSSurya Prakki 		if ((p = *procs) == NULL)
15257c478bd9Sstevel@tonic-gate 			goto skip;
1526fec0805bSSurya Prakki 		mutex_enter(&p->p_lock);
1527fec0805bSSurya Prakki 		/* Drop the process if it is exiting */
1528fec0805bSSurya Prakki 		if (p->p_poolflag & PEXITED) {
1529fec0805bSSurya Prakki 			mutex_exit(&p->p_lock);
1530fec0805bSSurya Prakki 			pool_bind_wake(p);
1531fec0805bSSurya Prakki 			procs_count--;
1532fec0805bSSurya Prakki 		} else
1533fec0805bSSurya Prakki 			mutex_exit(&p->p_lock);
1534fec0805bSSurya Prakki 		goto skip;
1535fec0805bSSurya Prakki 	}
15367c478bd9Sstevel@tonic-gate 
15377c478bd9Sstevel@tonic-gate 	/*
15387c478bd9Sstevel@tonic-gate 	 * Do another run, and drop processes that were inside the barrier
15397c478bd9Sstevel@tonic-gate 	 * in exit(), but when they have dropped to pool_barrier_exit
15407c478bd9Sstevel@tonic-gate 	 * they have become of no interest to us.  Pick up child processes that
15417c478bd9Sstevel@tonic-gate 	 * were created by fork() but didn't exist during our first scan.
15427c478bd9Sstevel@tonic-gate 	 * Their parents are now stopped at pool_barrier_exit in cfork().
15437c478bd9Sstevel@tonic-gate 	 */
15447c478bd9Sstevel@tonic-gate 	mutex_enter(&pidlock);
15457c478bd9Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++) {
1546fec0805bSSurya Prakki 		mutex_enter(&p->p_lock);
15477c478bd9Sstevel@tonic-gate 		if (p->p_poolflag & PEXITED) {
15487c478bd9Sstevel@tonic-gate 			ASSERT(p->p_lwpcnt == 0);
1549fec0805bSSurya Prakki 			mutex_exit(&p->p_lock);
15507c478bd9Sstevel@tonic-gate 			pool_bind_wake(p);
15517c478bd9Sstevel@tonic-gate 			/* flip w/last non-NULL slot */
15527c478bd9Sstevel@tonic-gate 			*pp = procs[procs_count - 1];
15537c478bd9Sstevel@tonic-gate 			procs[procs_count - 1] = NULL;
15547c478bd9Sstevel@tonic-gate 			procs_count--;
15557c478bd9Sstevel@tonic-gate 			pp--;			/* try this slot again */
15567c478bd9Sstevel@tonic-gate 			continue;
1557fec0805bSSurya Prakki 		} else
1558fec0805bSSurya Prakki 			mutex_exit(&p->p_lock);
15597c478bd9Sstevel@tonic-gate 		/*
15607c478bd9Sstevel@tonic-gate 		 * Look at the child and check if it should be rebound also.
15617c478bd9Sstevel@tonic-gate 		 * We're holding pidlock, so it is safe to reference p_child.
15627c478bd9Sstevel@tonic-gate 		 */
15637c478bd9Sstevel@tonic-gate 		if ((p = p->p_child) == NULL)
15647c478bd9Sstevel@tonic-gate 			continue;
15657c478bd9Sstevel@tonic-gate 
15667c478bd9Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
156701188c7aSjv227347 
15687c478bd9Sstevel@tonic-gate 		/*
156901188c7aSjv227347 		 * Skip system processes and make sure that the child is in
157001188c7aSjv227347 		 * the same task/project/pool/zone as the parent.
15717c478bd9Sstevel@tonic-gate 		 */
157201188c7aSjv227347 		if ((!INGLOBALZONE(p) && idtype != P_ZONEID &&
157301188c7aSjv227347 		    idtype != P_POOLID) || p->p_flag & SSYS) {
15747c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
15757c478bd9Sstevel@tonic-gate 			continue;
15767c478bd9Sstevel@tonic-gate 		}
15777c478bd9Sstevel@tonic-gate 
15787c478bd9Sstevel@tonic-gate 		/*
15797c478bd9Sstevel@tonic-gate 		 * If the child process has been already created by fork(), has
15807c478bd9Sstevel@tonic-gate 		 * not exited, and has not been added to the list already,
15817c478bd9Sstevel@tonic-gate 		 * then add it now.  We will hit this process again (since we
15827c478bd9Sstevel@tonic-gate 		 * stick it at the end of the procs list) but it will ignored
15837c478bd9Sstevel@tonic-gate 		 * because it will have the PBWAIT flag set.
15847c478bd9Sstevel@tonic-gate 		 */
15857c478bd9Sstevel@tonic-gate 		if (procinset(p, &set) &&
15867c478bd9Sstevel@tonic-gate 		    !(p->p_poolflag & PEXITED) &&
15877c478bd9Sstevel@tonic-gate 		    !(p->p_poolflag & PBWAIT)) {
15887c478bd9Sstevel@tonic-gate 			ASSERT(p->p_child == NULL); /* no child of a child */
15897c478bd9Sstevel@tonic-gate 			procs[procs_count] = p;
15907c478bd9Sstevel@tonic-gate 			procs[procs_count + 1] = NULL;
15917c478bd9Sstevel@tonic-gate 			procs_count++;
15927c478bd9Sstevel@tonic-gate 			p->p_poolflag |= PBWAIT;
15937c478bd9Sstevel@tonic-gate 		}
15947c478bd9Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
15957c478bd9Sstevel@tonic-gate 	}
15967c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
15977c478bd9Sstevel@tonic-gate skip:
15987c478bd9Sstevel@tonic-gate 	/*
15997c478bd9Sstevel@tonic-gate 	 * If there's no processes to rebind then return ESRCH, unless
16007c478bd9Sstevel@tonic-gate 	 * we're associating a pool with new resource set, destroying it,
16017c478bd9Sstevel@tonic-gate 	 * or binding a zone to a pool.
16027c478bd9Sstevel@tonic-gate 	 */
16037c478bd9Sstevel@tonic-gate 	if (procs_count == 0) {
16047c478bd9Sstevel@tonic-gate 		if (idtype == P_POOLID || idtype == P_ZONEID)
16057c478bd9Sstevel@tonic-gate 			rv = 0;
16067c478bd9Sstevel@tonic-gate 		else
16077c478bd9Sstevel@tonic-gate 			rv = ESRCH;
16087c478bd9Sstevel@tonic-gate 		goto out;
16097c478bd9Sstevel@tonic-gate 	}
16107c478bd9Sstevel@tonic-gate 
16117c478bd9Sstevel@tonic-gate #ifdef DEBUG
16127c478bd9Sstevel@tonic-gate 	/*
1613def11082Srh87107 	 * All processes in the array should have PBWAIT set, and none
1614def11082Srh87107 	 * should be in the critical section. Thus, although p_poolflag
1615def11082Srh87107 	 * and p_poolcnt are protected by p_lock, their ASSERTions below
1616def11082Srh87107 	 * should be stable without it. procinset(), however, ASSERTs that
1617def11082Srh87107 	 * the p_lock is held upon entry.
16187c478bd9Sstevel@tonic-gate 	 */
16197c478bd9Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++) {
1620def11082Srh87107 		int in_set;
1621def11082Srh87107 
1622def11082Srh87107 		mutex_enter(&p->p_lock);
1623def11082Srh87107 		in_set = procinset(p, &set);
1624def11082Srh87107 		mutex_exit(&p->p_lock);
1625def11082Srh87107 
1626def11082Srh87107 		ASSERT(in_set);
16277c478bd9Sstevel@tonic-gate 		ASSERT(p->p_poolflag & PBWAIT);
16287c478bd9Sstevel@tonic-gate 		ASSERT(p->p_poolcnt == 0);
16297c478bd9Sstevel@tonic-gate 	}
16307c478bd9Sstevel@tonic-gate #endif
16317c478bd9Sstevel@tonic-gate 
16327c478bd9Sstevel@tonic-gate 	/*
16337c478bd9Sstevel@tonic-gate 	 * Do the check if processor set rebinding is going to succeed or not.
16347c478bd9Sstevel@tonic-gate 	 */
16357c478bd9Sstevel@tonic-gate 	if ((flags & POOL_BIND_PSET) &&
16367c478bd9Sstevel@tonic-gate 	    (rv = pset_bind_start(procs, pool)) != 0) {
16377c478bd9Sstevel@tonic-gate 		pool_bind_wakeall(procs);
16387c478bd9Sstevel@tonic-gate 		goto out;
16397c478bd9Sstevel@tonic-gate 	}
16407c478bd9Sstevel@tonic-gate 
16417c478bd9Sstevel@tonic-gate 	/*
16427c478bd9Sstevel@tonic-gate 	 * At this point, all bind operations should succeed.
16437c478bd9Sstevel@tonic-gate 	 */
16447c478bd9Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++) {
16457c478bd9Sstevel@tonic-gate 		if (flags & POOL_BIND_PSET) {
16467c478bd9Sstevel@tonic-gate 			psetid_t psetid = pool->pool_pset->pset_id;
16477c478bd9Sstevel@tonic-gate 			void *zonebuf;
16487c478bd9Sstevel@tonic-gate 			void *projbuf;
16497c478bd9Sstevel@tonic-gate 
16507c478bd9Sstevel@tonic-gate 			/*
16517c478bd9Sstevel@tonic-gate 			 * Pre-allocate one buffer for FSS (per-project
16527c478bd9Sstevel@tonic-gate 			 * buffer for a new pset) in case if this is the
16537c478bd9Sstevel@tonic-gate 			 * first thread from its current project getting
16547c478bd9Sstevel@tonic-gate 			 * bound to this processor set.
16557c478bd9Sstevel@tonic-gate 			 */
16567c478bd9Sstevel@tonic-gate 			projbuf = fss_allocbuf(FSS_ONE_BUF, FSS_ALLOC_PROJ);
16577c478bd9Sstevel@tonic-gate 			zonebuf = fss_allocbuf(FSS_ONE_BUF, FSS_ALLOC_ZONE);
16587c478bd9Sstevel@tonic-gate 
16597c478bd9Sstevel@tonic-gate 			mutex_enter(&pidlock);
16607c478bd9Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
16617c478bd9Sstevel@tonic-gate 			pool_pset_bind(p, psetid, projbuf, zonebuf);
16627c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
16637c478bd9Sstevel@tonic-gate 			mutex_exit(&pidlock);
16647c478bd9Sstevel@tonic-gate 			/*
16657c478bd9Sstevel@tonic-gate 			 * Free buffers pre-allocated above if it
16667c478bd9Sstevel@tonic-gate 			 * wasn't actually used.
16677c478bd9Sstevel@tonic-gate 			 */
16687c478bd9Sstevel@tonic-gate 			fss_freebuf(projbuf, FSS_ALLOC_PROJ);
16697c478bd9Sstevel@tonic-gate 			fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
16707c478bd9Sstevel@tonic-gate 		}
16717c478bd9Sstevel@tonic-gate 		/*
16727c478bd9Sstevel@tonic-gate 		 * Now let's change the scheduling class of this
16737c478bd9Sstevel@tonic-gate 		 * process if our target pool has it defined.
16747c478bd9Sstevel@tonic-gate 		 */
16757c478bd9Sstevel@tonic-gate 		if (cid != POOL_CLASS_UNSET)
16767c478bd9Sstevel@tonic-gate 			pool_change_class(p, cid);
16777c478bd9Sstevel@tonic-gate 
16787c478bd9Sstevel@tonic-gate 		/*
16797c478bd9Sstevel@tonic-gate 		 * It is safe to reference p_pool here without holding
16807c478bd9Sstevel@tonic-gate 		 * p_lock because it cannot change underneath of us.
16817c478bd9Sstevel@tonic-gate 		 * We're holding pool_lock here, so nobody else can be
16827c478bd9Sstevel@tonic-gate 		 * moving this process between pools.  If process "p"
16837c478bd9Sstevel@tonic-gate 		 * would be exiting, we're guaranteed that it would be blocked
16847c478bd9Sstevel@tonic-gate 		 * at pool_barrier_enter() in exit().  Otherwise, it would've
16857c478bd9Sstevel@tonic-gate 		 * been skipped by one of our scans of the practive list
16867c478bd9Sstevel@tonic-gate 		 * as a process with PEXITED flag set.
16877c478bd9Sstevel@tonic-gate 		 */
16887c478bd9Sstevel@tonic-gate 		if (p->p_pool != pool) {
16897c478bd9Sstevel@tonic-gate 			ASSERT(p->p_pool->pool_ref > 0);
1690*1a5e258fSJosef 'Jeff' Sipek 			atomic_dec_32(&p->p_pool->pool_ref);
16917c478bd9Sstevel@tonic-gate 			p->p_pool = pool;
1692*1a5e258fSJosef 'Jeff' Sipek 			atomic_inc_32(&p->p_pool->pool_ref);
16937c478bd9Sstevel@tonic-gate 		}
16947c478bd9Sstevel@tonic-gate 		/*
16957c478bd9Sstevel@tonic-gate 		 * Okay, we've tortured this guy enough.
16967c478bd9Sstevel@tonic-gate 		 * Let this poor process go now.
16977c478bd9Sstevel@tonic-gate 		 */
16987c478bd9Sstevel@tonic-gate 		pool_bind_wake(p);
16997c478bd9Sstevel@tonic-gate 	}
17007c478bd9Sstevel@tonic-gate 	if (flags & POOL_BIND_PSET)
17017c478bd9Sstevel@tonic-gate 		pset_bind_finish();
17027c478bd9Sstevel@tonic-gate 
17037c478bd9Sstevel@tonic-gate out:	switch (idtype) {
17047c478bd9Sstevel@tonic-gate 	case P_PROJID:
17057c478bd9Sstevel@tonic-gate 		ASSERT(kpj != NULL);
17067c478bd9Sstevel@tonic-gate 		mutex_exit(&kpj->kpj_poolbind);
17077c478bd9Sstevel@tonic-gate 		project_rele(kpj);
17087c478bd9Sstevel@tonic-gate 		break;
17097c478bd9Sstevel@tonic-gate 	case P_ZONEID:
17107c478bd9Sstevel@tonic-gate 		if (rv == 0) {
17117c478bd9Sstevel@tonic-gate 			mutex_enter(&cpu_lock);
17127c478bd9Sstevel@tonic-gate 			zone_pool_set(zone, pool);
17137c478bd9Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
17147c478bd9Sstevel@tonic-gate 		}
17157c478bd9Sstevel@tonic-gate 		zone->zone_pool_mod = gethrtime();
17167c478bd9Sstevel@tonic-gate 		zone_rele(zone);
17177c478bd9Sstevel@tonic-gate 		break;
17187c478bd9Sstevel@tonic-gate 	}
17197c478bd9Sstevel@tonic-gate 
17207c478bd9Sstevel@tonic-gate 	kmem_free(procs, procs_size * sizeof (proc_t *));
17217c478bd9Sstevel@tonic-gate 	ASSERT(pool_barrier_count == 0);
17227c478bd9Sstevel@tonic-gate 	return (rv);
17237c478bd9Sstevel@tonic-gate }
17240dc2366fSVenugopal Iyer 
17250dc2366fSVenugopal Iyer void
pool_event_cb_register(pool_event_cb_t * cb)17260dc2366fSVenugopal Iyer pool_event_cb_register(pool_event_cb_t *cb)
17270dc2366fSVenugopal Iyer {
17280dc2366fSVenugopal Iyer 	ASSERT(!pool_lock_held() || panicstr);
17290dc2366fSVenugopal Iyer 	ASSERT(cb->pec_func != NULL);
17300dc2366fSVenugopal Iyer 
17310dc2366fSVenugopal Iyer 	mutex_enter(&pool_event_cb_lock);
17320dc2366fSVenugopal Iyer 	if (!pool_event_cb_init) {
17330dc2366fSVenugopal Iyer 		list_create(&pool_event_cb_list,  sizeof (pool_event_cb_t),
17340dc2366fSVenugopal Iyer 		    offsetof(pool_event_cb_t, pec_list));
17350dc2366fSVenugopal Iyer 		pool_event_cb_init = B_TRUE;
17360dc2366fSVenugopal Iyer 	}
17370dc2366fSVenugopal Iyer 	list_insert_tail(&pool_event_cb_list, cb);
17380dc2366fSVenugopal Iyer 	mutex_exit(&pool_event_cb_lock);
17390dc2366fSVenugopal Iyer }
17400dc2366fSVenugopal Iyer 
17410dc2366fSVenugopal Iyer void
pool_event_cb_unregister(pool_event_cb_t * cb)17420dc2366fSVenugopal Iyer pool_event_cb_unregister(pool_event_cb_t *cb)
17430dc2366fSVenugopal Iyer {
17440dc2366fSVenugopal Iyer 	ASSERT(!pool_lock_held() || panicstr);
17450dc2366fSVenugopal Iyer 
17460dc2366fSVenugopal Iyer 	mutex_enter(&pool_event_cb_lock);
17470dc2366fSVenugopal Iyer 	list_remove(&pool_event_cb_list, cb);
17480dc2366fSVenugopal Iyer 	mutex_exit(&pool_event_cb_lock);
17490dc2366fSVenugopal Iyer }
17500dc2366fSVenugopal Iyer 
17510dc2366fSVenugopal Iyer typedef struct {
17520dc2366fSVenugopal Iyer 	pool_event_t	tqd_what;
17530dc2366fSVenugopal Iyer 	poolid_t	tqd_id;
17540dc2366fSVenugopal Iyer } pool_tqd_t;
17550dc2366fSVenugopal Iyer 
17560dc2366fSVenugopal Iyer void
pool_event_notify(void * arg)17570dc2366fSVenugopal Iyer pool_event_notify(void *arg)
17580dc2366fSVenugopal Iyer {
17590dc2366fSVenugopal Iyer 	pool_tqd_t	*tqd = (pool_tqd_t *)arg;
17600dc2366fSVenugopal Iyer 	pool_event_cb_t	*cb;
17610dc2366fSVenugopal Iyer 
17620dc2366fSVenugopal Iyer 	ASSERT(!pool_lock_held() || panicstr);
17630dc2366fSVenugopal Iyer 
17640dc2366fSVenugopal Iyer 	mutex_enter(&pool_event_cb_lock);
17650dc2366fSVenugopal Iyer 	for (cb = list_head(&pool_event_cb_list); cb != NULL;
17660dc2366fSVenugopal Iyer 	    cb = list_next(&pool_event_cb_list, cb)) {
17670dc2366fSVenugopal Iyer 		cb->pec_func(tqd->tqd_what, tqd->tqd_id, cb->pec_arg);
17680dc2366fSVenugopal Iyer 	}
17690dc2366fSVenugopal Iyer 	mutex_exit(&pool_event_cb_lock);
17700dc2366fSVenugopal Iyer 	kmem_free(tqd, sizeof (*tqd));
17710dc2366fSVenugopal Iyer }
17720dc2366fSVenugopal Iyer 
17730dc2366fSVenugopal Iyer void
pool_event_dispatch(pool_event_t what,poolid_t id)17740dc2366fSVenugopal Iyer pool_event_dispatch(pool_event_t what, poolid_t id)
17750dc2366fSVenugopal Iyer {
17760dc2366fSVenugopal Iyer 	pool_tqd_t *tqd = NULL;
17770dc2366fSVenugopal Iyer 
17780dc2366fSVenugopal Iyer 	ASSERT(pool_lock_held());
17790dc2366fSVenugopal Iyer 
17800dc2366fSVenugopal Iyer 	if (pool_event_cb_taskq == NULL) {
17810dc2366fSVenugopal Iyer 		pool_event_cb_taskq = taskq_create("pool_event_cb_taskq", 1,
17820dc2366fSVenugopal Iyer 		    -1, 1, 1, TASKQ_PREPOPULATE);
17830dc2366fSVenugopal Iyer 	}
17840dc2366fSVenugopal Iyer 
17850dc2366fSVenugopal Iyer 	tqd = kmem_alloc(sizeof (*tqd), KM_SLEEP);
17860dc2366fSVenugopal Iyer 	tqd->tqd_what = what;
17870dc2366fSVenugopal Iyer 	tqd->tqd_id = id;
17880dc2366fSVenugopal Iyer 
17890dc2366fSVenugopal Iyer 	(void) taskq_dispatch(pool_event_cb_taskq, pool_event_notify, tqd,
17900dc2366fSVenugopal Iyer 	    KM_SLEEP);
17910dc2366fSVenugopal Iyer }
1792