xref: /titanic_50/usr/src/uts/common/disp/cpupart.c (revision fb2f18f820d90b001aea4fb27dd654bc1263c440)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5e824d57fSjohnlev  * Common Development and Distribution License (the "License").
6e824d57fSjohnlev  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*fb2f18f8Sesaxe  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include <sys/types.h>
297c478bd9Sstevel@tonic-gate #include <sys/systm.h>
307c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
317c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
327c478bd9Sstevel@tonic-gate #include <sys/thread.h>
337c478bd9Sstevel@tonic-gate #include <sys/disp.h>
347c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
357c478bd9Sstevel@tonic-gate #include <sys/debug.h>
367c478bd9Sstevel@tonic-gate #include <sys/cpupart.h>
377c478bd9Sstevel@tonic-gate #include <sys/pset.h>
387c478bd9Sstevel@tonic-gate #include <sys/var.h>
397c478bd9Sstevel@tonic-gate #include <sys/cyclic.h>
407c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
41*fb2f18f8Sesaxe #include <sys/pghw.h>
427c478bd9Sstevel@tonic-gate #include <sys/loadavg.h>
437c478bd9Sstevel@tonic-gate #include <sys/class.h>
447c478bd9Sstevel@tonic-gate #include <sys/fss.h>
457c478bd9Sstevel@tonic-gate #include <sys/pool.h>
467c478bd9Sstevel@tonic-gate #include <sys/pool_pset.h>
477c478bd9Sstevel@tonic-gate #include <sys/policy.h>
487c478bd9Sstevel@tonic-gate 
497c478bd9Sstevel@tonic-gate /*
507c478bd9Sstevel@tonic-gate  * Calling pool_lock() protects the pools configuration, which includes
517c478bd9Sstevel@tonic-gate  * CPU partitions.  cpu_lock protects the CPU partition list, and prevents
527c478bd9Sstevel@tonic-gate  * partitions from being created or destroyed while the lock is held.
537c478bd9Sstevel@tonic-gate  * The lock ordering with respect to related locks is:
547c478bd9Sstevel@tonic-gate  *
557c478bd9Sstevel@tonic-gate  *    pool_lock() ---> cpu_lock  --->  pidlock  -->  p_lock
567c478bd9Sstevel@tonic-gate  *
577c478bd9Sstevel@tonic-gate  * Blocking memory allocations may be made while holding "pool_lock"
587c478bd9Sstevel@tonic-gate  * or cpu_lock.
597c478bd9Sstevel@tonic-gate  */
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate /*
627c478bd9Sstevel@tonic-gate  * The cp_default partition is allocated statically, but its lgroup load average
637c478bd9Sstevel@tonic-gate  * (lpl) list is allocated dynamically after kmem subsystem is initialized. This
647c478bd9Sstevel@tonic-gate  * saves some memory since the space allocated reflects the actual number of
657c478bd9Sstevel@tonic-gate  * lgroups supported by the platform. The lgrp facility provides a temporary
667c478bd9Sstevel@tonic-gate  * space to hold lpl information during system bootstrap.
677c478bd9Sstevel@tonic-gate  */
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate cpupart_t		*cp_list_head;
707c478bd9Sstevel@tonic-gate cpupart_t		cp_default;
71e824d57fSjohnlev struct mach_cpupart	cp_default_mach;
727c478bd9Sstevel@tonic-gate static cpupartid_t	cp_id_next;
737c478bd9Sstevel@tonic-gate uint_t			cp_numparts;
747c478bd9Sstevel@tonic-gate uint_t			cp_numparts_nonempty;
757c478bd9Sstevel@tonic-gate 
767c478bd9Sstevel@tonic-gate /*
777c478bd9Sstevel@tonic-gate  * Need to limit total number of partitions to avoid slowing down the
787c478bd9Sstevel@tonic-gate  * clock code too much.  The clock code traverses the list of
797c478bd9Sstevel@tonic-gate  * partitions and needs to be able to execute in a reasonable amount
807c478bd9Sstevel@tonic-gate  * of time (less than 1/hz seconds).  The maximum is sized based on
817c478bd9Sstevel@tonic-gate  * max_ncpus so it shouldn't be a problem unless there are large
827c478bd9Sstevel@tonic-gate  * numbers of empty partitions.
837c478bd9Sstevel@tonic-gate  */
847c478bd9Sstevel@tonic-gate static uint_t		cp_max_numparts;
857c478bd9Sstevel@tonic-gate 
867c478bd9Sstevel@tonic-gate /*
877c478bd9Sstevel@tonic-gate  * Processor sets and CPU partitions are different but related concepts.
887c478bd9Sstevel@tonic-gate  * A processor set is a user-level abstraction allowing users to create
897c478bd9Sstevel@tonic-gate  * sets of CPUs and bind threads exclusively to those sets.  A CPU
907c478bd9Sstevel@tonic-gate  * partition is a kernel dispatcher object consisting of a set of CPUs
917c478bd9Sstevel@tonic-gate  * and a global dispatch queue.  The processor set abstraction is
927c478bd9Sstevel@tonic-gate  * implemented via a CPU partition, and currently there is a 1-1
937c478bd9Sstevel@tonic-gate  * mapping between processor sets and partitions (excluding the default
947c478bd9Sstevel@tonic-gate  * partition, which is not visible as a processor set).  Hence, the
957c478bd9Sstevel@tonic-gate  * numbering for processor sets and CPU partitions is identical.  This
967c478bd9Sstevel@tonic-gate  * may not always be true in the future, and these macros could become
977c478bd9Sstevel@tonic-gate  * less trivial if we support e.g. a processor set containing multiple
987c478bd9Sstevel@tonic-gate  * CPU partitions.
997c478bd9Sstevel@tonic-gate  */
1007c478bd9Sstevel@tonic-gate #define	PSTOCP(psid)	((cpupartid_t)((psid) == PS_NONE ? CP_DEFAULT : (psid)))
1017c478bd9Sstevel@tonic-gate #define	CPTOPS(cpid)	((psetid_t)((cpid) == CP_DEFAULT ? PS_NONE : (cpid)))
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate /*
1047c478bd9Sstevel@tonic-gate  * Find a CPU partition given a processor set ID.
1057c478bd9Sstevel@tonic-gate  */
1067c478bd9Sstevel@tonic-gate static cpupart_t *
1077c478bd9Sstevel@tonic-gate cpupart_find_all(psetid_t psid)
1087c478bd9Sstevel@tonic-gate {
1097c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1107c478bd9Sstevel@tonic-gate 	cpupartid_t cpid = PSTOCP(psid);
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1137c478bd9Sstevel@tonic-gate 
1147c478bd9Sstevel@tonic-gate 	/* default partition not visible as a processor set */
1157c478bd9Sstevel@tonic-gate 	if (psid == CP_DEFAULT)
1167c478bd9Sstevel@tonic-gate 		return (NULL);
1177c478bd9Sstevel@tonic-gate 
1187c478bd9Sstevel@tonic-gate 	if (psid == PS_MYID)
1197c478bd9Sstevel@tonic-gate 		return (curthread->t_cpupart);
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate 	cp = cp_list_head;
1227c478bd9Sstevel@tonic-gate 	do {
1237c478bd9Sstevel@tonic-gate 		if (cp->cp_id == cpid)
1247c478bd9Sstevel@tonic-gate 			return (cp);
1257c478bd9Sstevel@tonic-gate 		cp = cp->cp_next;
1267c478bd9Sstevel@tonic-gate 	} while (cp != cp_list_head);
1277c478bd9Sstevel@tonic-gate 	return (NULL);
1287c478bd9Sstevel@tonic-gate }
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate /*
1317c478bd9Sstevel@tonic-gate  * Find a CPU partition given a processor set ID if the processor set
1327c478bd9Sstevel@tonic-gate  * should be visible from the calling zone.
1337c478bd9Sstevel@tonic-gate  */
1347c478bd9Sstevel@tonic-gate cpupart_t *
1357c478bd9Sstevel@tonic-gate cpupart_find(psetid_t psid)
1367c478bd9Sstevel@tonic-gate {
1377c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1407c478bd9Sstevel@tonic-gate 	cp = cpupart_find_all(psid);
1417c478bd9Sstevel@tonic-gate 	if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
1427c478bd9Sstevel@tonic-gate 	    zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id))
1437c478bd9Sstevel@tonic-gate 			return (NULL);
1447c478bd9Sstevel@tonic-gate 	return (cp);
1457c478bd9Sstevel@tonic-gate }
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate static int
1487c478bd9Sstevel@tonic-gate cpupart_kstat_update(kstat_t *ksp, int rw)
1497c478bd9Sstevel@tonic-gate {
1507c478bd9Sstevel@tonic-gate 	cpupart_t *cp = (cpupart_t *)ksp->ks_private;
1517c478bd9Sstevel@tonic-gate 	cpupart_kstat_t *cpksp = ksp->ks_data;
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
1547c478bd9Sstevel@tonic-gate 		return (EACCES);
1557c478bd9Sstevel@tonic-gate 
1567c478bd9Sstevel@tonic-gate 	cpksp->cpk_updates.value.ui64 = cp->cp_updates;
1577c478bd9Sstevel@tonic-gate 	cpksp->cpk_runnable.value.ui64 = cp->cp_nrunnable_cum;
1587c478bd9Sstevel@tonic-gate 	cpksp->cpk_waiting.value.ui64 = cp->cp_nwaiting_cum;
1597c478bd9Sstevel@tonic-gate 	cpksp->cpk_ncpus.value.ui32 = cp->cp_ncpus;
1607c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_1min.value.ui32 = cp->cp_hp_avenrun[0] >>
1617c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
1627c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_5min.value.ui32 = cp->cp_hp_avenrun[1] >>
1637c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
1647c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_15min.value.ui32 = cp->cp_hp_avenrun[2] >>
1657c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
1667c478bd9Sstevel@tonic-gate 	return (0);
1677c478bd9Sstevel@tonic-gate }
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate static void
1707c478bd9Sstevel@tonic-gate cpupart_kstat_create(cpupart_t *cp)
1717c478bd9Sstevel@tonic-gate {
1727c478bd9Sstevel@tonic-gate 	kstat_t *ksp;
1737c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate 	/*
1787c478bd9Sstevel@tonic-gate 	 * We have a bit of a chicken-egg problem since this code will
1797c478bd9Sstevel@tonic-gate 	 * get called to create the kstats for CP_DEFAULT before the
1807c478bd9Sstevel@tonic-gate 	 * pools framework gets initialized.  We circumvent the problem
1817c478bd9Sstevel@tonic-gate 	 * by special-casing cp_default.
1827c478bd9Sstevel@tonic-gate 	 */
1837c478bd9Sstevel@tonic-gate 	if (cp != &cp_default && pool_pset_enabled())
1847c478bd9Sstevel@tonic-gate 		zoneid = GLOBAL_ZONEID;
1857c478bd9Sstevel@tonic-gate 	else
1867c478bd9Sstevel@tonic-gate 		zoneid = ALL_ZONES;
1877c478bd9Sstevel@tonic-gate 	ksp = kstat_create_zone("unix", cp->cp_id, "pset", "misc",
1887c478bd9Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED,
1897c478bd9Sstevel@tonic-gate 	    sizeof (cpupart_kstat_t) / sizeof (kstat_named_t), 0, zoneid);
1907c478bd9Sstevel@tonic-gate 	if (ksp != NULL) {
1917c478bd9Sstevel@tonic-gate 		cpupart_kstat_t *cpksp = ksp->ks_data;
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_updates, "updates",
1947c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1957c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_runnable, "runnable",
1967c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1977c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_waiting, "waiting",
1987c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1997c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_ncpus, "ncpus",
2007c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2017c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_1min, "avenrun_1min",
2027c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2037c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_5min, "avenrun_5min",
2047c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2057c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_15min, "avenrun_15min",
2067c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 		ksp->ks_update = cpupart_kstat_update;
2097c478bd9Sstevel@tonic-gate 		ksp->ks_private = cp;
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate 		kstat_install(ksp);
2127c478bd9Sstevel@tonic-gate 	}
2137c478bd9Sstevel@tonic-gate 	cp->cp_kstat = ksp;
2147c478bd9Sstevel@tonic-gate }
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate /*
2177c478bd9Sstevel@tonic-gate  * Initialize the default partition and kpreempt disp queue.
2187c478bd9Sstevel@tonic-gate  */
2197c478bd9Sstevel@tonic-gate void
2207c478bd9Sstevel@tonic-gate cpupart_initialize_default(void)
2217c478bd9Sstevel@tonic-gate {
2227c478bd9Sstevel@tonic-gate 	lgrp_id_t i;
2237c478bd9Sstevel@tonic-gate 
2247c478bd9Sstevel@tonic-gate 	cp_list_head = &cp_default;
2257c478bd9Sstevel@tonic-gate 	cp_default.cp_next = &cp_default;
2267c478bd9Sstevel@tonic-gate 	cp_default.cp_prev = &cp_default;
2277c478bd9Sstevel@tonic-gate 	cp_default.cp_id = CP_DEFAULT;
2287c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_maxrunpri = -1;
2297c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_max_unbound_pri = -1;
2307c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_cpu = NULL;
2317c478bd9Sstevel@tonic-gate 	cp_default.cp_gen = 0;
2327c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_cur = 0;
2337c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_len = 0;
2347c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_total = 0;
2357c478bd9Sstevel@tonic-gate 	for (i = 0; i < S_LOADAVG_SZ; i++) {
2367c478bd9Sstevel@tonic-gate 		cp_default.cp_loadavg.lg_loads[i] = 0;
2377c478bd9Sstevel@tonic-gate 	}
238e824d57fSjohnlev 	CPUSET_ZERO(cp_default.cp_mach->mc_haltset);
2397c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&cp_default.cp_kp_queue.disp_lock);
2407c478bd9Sstevel@tonic-gate 	cp_id_next = CP_DEFAULT + 1;
2417c478bd9Sstevel@tonic-gate 	cpupart_kstat_create(&cp_default);
2427c478bd9Sstevel@tonic-gate 	cp_numparts = 1;
2437c478bd9Sstevel@tonic-gate 	if (cp_max_numparts == 0)	/* allow for /etc/system tuning */
2447c478bd9Sstevel@tonic-gate 		cp_max_numparts = max_ncpus * 2 + 1;
2457c478bd9Sstevel@tonic-gate 	/*
2467c478bd9Sstevel@tonic-gate 	 * Allocate space for cp_default list of lgrploads
2477c478bd9Sstevel@tonic-gate 	 */
2487c478bd9Sstevel@tonic-gate 	cp_default.cp_nlgrploads = lgrp_plat_max_lgrps();
2497c478bd9Sstevel@tonic-gate 	cp_default.cp_lgrploads = kmem_zalloc(sizeof (lpl_t) *
2507c478bd9Sstevel@tonic-gate 	    cp_default.cp_nlgrploads, KM_SLEEP);
2517c478bd9Sstevel@tonic-gate 
2527c478bd9Sstevel@tonic-gate 	/*
2537c478bd9Sstevel@tonic-gate 	 * The initial lpl topology is created in a special lpl list
2547c478bd9Sstevel@tonic-gate 	 * lpl_bootstrap. It should be copied to cp_default.
2557c478bd9Sstevel@tonic-gate 	 * NOTE: lpl_topo_bootstrap() also updates CPU0 cpu_lpl pointer to point
2567c478bd9Sstevel@tonic-gate 	 *	 to the correct lpl in the cp_default.cp_lgrploads list.
2577c478bd9Sstevel@tonic-gate 	 */
2587c478bd9Sstevel@tonic-gate 	lpl_topo_bootstrap(cp_default.cp_lgrploads,
2597c478bd9Sstevel@tonic-gate 	    cp_default.cp_nlgrploads);
2607c478bd9Sstevel@tonic-gate 
2617c478bd9Sstevel@tonic-gate 	for (i = 0; i < cp_default.cp_nlgrploads; i++) {
2627c478bd9Sstevel@tonic-gate 		cp_default.cp_lgrploads[i].lpl_lgrpid = i;
2637c478bd9Sstevel@tonic-gate 	}
2647c478bd9Sstevel@tonic-gate 	cp_default.cp_attr = PSET_NOESCAPE;
2657c478bd9Sstevel@tonic-gate 	cp_numparts_nonempty = 1;
2667c478bd9Sstevel@tonic-gate 	/*
2677c478bd9Sstevel@tonic-gate 	 * Set t0's home
2687c478bd9Sstevel@tonic-gate 	 */
2697c478bd9Sstevel@tonic-gate 	t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID];
270*fb2f18f8Sesaxe 
271*fb2f18f8Sesaxe 	bitset_init(&cp_default.cp_cmt_pgs);
2727c478bd9Sstevel@tonic-gate }
2737c478bd9Sstevel@tonic-gate 
2747c478bd9Sstevel@tonic-gate 
2757c478bd9Sstevel@tonic-gate static int
2767c478bd9Sstevel@tonic-gate cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
2777c478bd9Sstevel@tonic-gate {
2787c478bd9Sstevel@tonic-gate 	cpupart_t *oldpp;
2797c478bd9Sstevel@tonic-gate 	cpu_t	*ncp, *newlist;
2807c478bd9Sstevel@tonic-gate 	kthread_t *t;
2817c478bd9Sstevel@tonic-gate 	int	move_threads = 1;
2827c478bd9Sstevel@tonic-gate 	lgrp_id_t lgrpid;
2837c478bd9Sstevel@tonic-gate 	proc_t 	*p;
2847c478bd9Sstevel@tonic-gate 	int lgrp_diff_lpl;
2857c478bd9Sstevel@tonic-gate 	lpl_t	*cpu_lpl;
2867c478bd9Sstevel@tonic-gate 	int	ret;
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
2897c478bd9Sstevel@tonic-gate 	ASSERT(newpp != NULL);
2907c478bd9Sstevel@tonic-gate 
2917c478bd9Sstevel@tonic-gate 	oldpp = cp->cpu_part;
2927c478bd9Sstevel@tonic-gate 	ASSERT(oldpp != NULL);
2937c478bd9Sstevel@tonic-gate 	ASSERT(oldpp->cp_ncpus > 0);
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate 	if (newpp == oldpp) {
2967c478bd9Sstevel@tonic-gate 		/*
2977c478bd9Sstevel@tonic-gate 		 * Don't need to do anything.
2987c478bd9Sstevel@tonic-gate 		 */
2997c478bd9Sstevel@tonic-gate 		return (0);
3007c478bd9Sstevel@tonic-gate 	}
3017c478bd9Sstevel@tonic-gate 
3027c478bd9Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT);
3037c478bd9Sstevel@tonic-gate 
3047c478bd9Sstevel@tonic-gate 	if (!disp_bound_partition(cp, 0)) {
3057c478bd9Sstevel@tonic-gate 		/*
3067c478bd9Sstevel@tonic-gate 		 * Don't need to move threads if there are no threads in
3077c478bd9Sstevel@tonic-gate 		 * the partition.  Note that threads can't enter the
3087c478bd9Sstevel@tonic-gate 		 * partition while we're holding cpu_lock.
3097c478bd9Sstevel@tonic-gate 		 */
3107c478bd9Sstevel@tonic-gate 		move_threads = 0;
3117c478bd9Sstevel@tonic-gate 	} else if (oldpp->cp_ncpus == 1) {
3127c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3137c478bd9Sstevel@tonic-gate 		return (EBUSY);
3147c478bd9Sstevel@tonic-gate 	}
3157c478bd9Sstevel@tonic-gate 
3167c478bd9Sstevel@tonic-gate 	if (forced && (ret = cpu_unbind(cp->cpu_id)) != 0) {
3177c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3187c478bd9Sstevel@tonic-gate 		return (ret);
3197c478bd9Sstevel@tonic-gate 	}
3207c478bd9Sstevel@tonic-gate 
3217c478bd9Sstevel@tonic-gate 	/*
3227c478bd9Sstevel@tonic-gate 	 * Stop further threads weak binding to this cpu.
3237c478bd9Sstevel@tonic-gate 	 */
3247c478bd9Sstevel@tonic-gate 	cpu_inmotion = cp;
3257c478bd9Sstevel@tonic-gate 	membar_enter();
3267c478bd9Sstevel@tonic-gate 
327*fb2f18f8Sesaxe 	/*
328*fb2f18f8Sesaxe 	 * Notify the Processor Groups subsystem that the CPU
329*fb2f18f8Sesaxe 	 * will be moving cpu partitions. This is done before
330*fb2f18f8Sesaxe 	 * CPUs are paused to provide an opportunity for any
331*fb2f18f8Sesaxe 	 * needed memory allocations.
332*fb2f18f8Sesaxe 	 */
333*fb2f18f8Sesaxe 	pg_cpupart_out(cp, oldpp);
334*fb2f18f8Sesaxe 	pg_cpupart_in(cp, newpp);
335*fb2f18f8Sesaxe 
3367c478bd9Sstevel@tonic-gate again:
3377c478bd9Sstevel@tonic-gate 	if (move_threads) {
3387c478bd9Sstevel@tonic-gate 		int loop_count;
3397c478bd9Sstevel@tonic-gate 		/*
3407c478bd9Sstevel@tonic-gate 		 * Check for threads strong or weak bound to this CPU.
3417c478bd9Sstevel@tonic-gate 		 */
3427c478bd9Sstevel@tonic-gate 		for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) {
3437c478bd9Sstevel@tonic-gate 			if (loop_count >= 5) {
3447c478bd9Sstevel@tonic-gate 				cpu_state_change_notify(cp->cpu_id,
3457c478bd9Sstevel@tonic-gate 				    CPU_CPUPART_IN);
346*fb2f18f8Sesaxe 				pg_cpupart_out(cp, newpp);
347*fb2f18f8Sesaxe 				pg_cpupart_in(cp, oldpp);
3487c478bd9Sstevel@tonic-gate 				cpu_inmotion = NULL;
3497c478bd9Sstevel@tonic-gate 				return (EBUSY);	/* some threads still bound */
3507c478bd9Sstevel@tonic-gate 			}
3517c478bd9Sstevel@tonic-gate 			delay(1);
3527c478bd9Sstevel@tonic-gate 		}
3537c478bd9Sstevel@tonic-gate 	}
3547c478bd9Sstevel@tonic-gate 
3557c478bd9Sstevel@tonic-gate 	/*
3567c478bd9Sstevel@tonic-gate 	 * Before we actually start changing data structures, notify
3577c478bd9Sstevel@tonic-gate 	 * the cyclic subsystem that we want to move this CPU out of its
3587c478bd9Sstevel@tonic-gate 	 * partition.
3597c478bd9Sstevel@tonic-gate 	 */
3607c478bd9Sstevel@tonic-gate 	if (!cyclic_move_out(cp)) {
3617c478bd9Sstevel@tonic-gate 		/*
3627c478bd9Sstevel@tonic-gate 		 * This CPU must be the last CPU in a processor set with
3637c478bd9Sstevel@tonic-gate 		 * a bound cyclic.
3647c478bd9Sstevel@tonic-gate 		 */
3657c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
366*fb2f18f8Sesaxe 		pg_cpupart_out(cp, newpp);
367*fb2f18f8Sesaxe 		pg_cpupart_in(cp, oldpp);
3687c478bd9Sstevel@tonic-gate 		cpu_inmotion = NULL;
3697c478bd9Sstevel@tonic-gate 		return (EBUSY);
3707c478bd9Sstevel@tonic-gate 	}
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate 	pause_cpus(cp);
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate 	if (move_threads) {
3757c478bd9Sstevel@tonic-gate 		/*
3767c478bd9Sstevel@tonic-gate 		 * The thread on cpu before the pause thread may have read
3777c478bd9Sstevel@tonic-gate 		 * cpu_inmotion before we raised the barrier above.  Check
3787c478bd9Sstevel@tonic-gate 		 * again.
3797c478bd9Sstevel@tonic-gate 		 */
3807c478bd9Sstevel@tonic-gate 		if (disp_bound_threads(cp, 1)) {
3817c478bd9Sstevel@tonic-gate 			start_cpus();
3827c478bd9Sstevel@tonic-gate 			goto again;
3837c478bd9Sstevel@tonic-gate 		}
3847c478bd9Sstevel@tonic-gate 
3857c478bd9Sstevel@tonic-gate 	}
3867c478bd9Sstevel@tonic-gate 
3877c478bd9Sstevel@tonic-gate 	/*
388*fb2f18f8Sesaxe 	 * Now that CPUs are paused, let the PG subsystem perform
389*fb2f18f8Sesaxe 	 * any necessary data structure updates.
3907c478bd9Sstevel@tonic-gate 	 */
391*fb2f18f8Sesaxe 	pg_cpupart_move(cp, oldpp, newpp);
3927c478bd9Sstevel@tonic-gate 
3937c478bd9Sstevel@tonic-gate 	/* save this cpu's lgroup -- it'll be the same in the new partition */
3947c478bd9Sstevel@tonic-gate 	lgrpid = cp->cpu_lpl->lpl_lgrpid;
3957c478bd9Sstevel@tonic-gate 
3967c478bd9Sstevel@tonic-gate 	cpu_lpl = cp->cpu_lpl;
3977c478bd9Sstevel@tonic-gate 	/*
3987c478bd9Sstevel@tonic-gate 	 * let the lgroup framework know cp has left the partition
3997c478bd9Sstevel@tonic-gate 	 */
4007c478bd9Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid);
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate 	/* move out of old partition */
4037c478bd9Sstevel@tonic-gate 	oldpp->cp_ncpus--;
4047c478bd9Sstevel@tonic-gate 	if (oldpp->cp_ncpus > 0) {
4057c478bd9Sstevel@tonic-gate 
4067c478bd9Sstevel@tonic-gate 		ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
4077c478bd9Sstevel@tonic-gate 		cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
4087c478bd9Sstevel@tonic-gate 		if (oldpp->cp_cpulist == cp) {
4097c478bd9Sstevel@tonic-gate 			oldpp->cp_cpulist = ncp;
4107c478bd9Sstevel@tonic-gate 		}
4117c478bd9Sstevel@tonic-gate 	} else {
4127c478bd9Sstevel@tonic-gate 		ncp = oldpp->cp_cpulist = NULL;
4137c478bd9Sstevel@tonic-gate 		cp_numparts_nonempty--;
4147c478bd9Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
4157c478bd9Sstevel@tonic-gate 	}
4167c478bd9Sstevel@tonic-gate 	oldpp->cp_gen++;
4177c478bd9Sstevel@tonic-gate 
4187c478bd9Sstevel@tonic-gate 	/* move into new partition */
4197c478bd9Sstevel@tonic-gate 	newlist = newpp->cp_cpulist;
4207c478bd9Sstevel@tonic-gate 	if (newlist == NULL) {
4217c478bd9Sstevel@tonic-gate 		newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
4227c478bd9Sstevel@tonic-gate 		cp_numparts_nonempty++;
4237c478bd9Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
4247c478bd9Sstevel@tonic-gate 	} else {
4257c478bd9Sstevel@tonic-gate 		cp->cpu_next_part = newlist;
4267c478bd9Sstevel@tonic-gate 		cp->cpu_prev_part = newlist->cpu_prev_part;
4277c478bd9Sstevel@tonic-gate 		newlist->cpu_prev_part->cpu_next_part = cp;
4287c478bd9Sstevel@tonic-gate 		newlist->cpu_prev_part = cp;
4297c478bd9Sstevel@tonic-gate 	}
4307c478bd9Sstevel@tonic-gate 	cp->cpu_part = newpp;
4317c478bd9Sstevel@tonic-gate 	newpp->cp_ncpus++;
4327c478bd9Sstevel@tonic-gate 	newpp->cp_gen++;
4337c478bd9Sstevel@tonic-gate 
434e824d57fSjohnlev 	ASSERT(CPUSET_ISNULL(newpp->cp_mach->mc_haltset));
435e824d57fSjohnlev 	ASSERT(CPUSET_ISNULL(oldpp->cp_mach->mc_haltset));
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate 	/*
4387c478bd9Sstevel@tonic-gate 	 * let the lgroup framework know cp has entered the partition
4397c478bd9Sstevel@tonic-gate 	 */
4407c478bd9Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid);
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	/*
4437c478bd9Sstevel@tonic-gate 	 * If necessary, move threads off processor.
4447c478bd9Sstevel@tonic-gate 	 */
4457c478bd9Sstevel@tonic-gate 	if (move_threads) {
4467c478bd9Sstevel@tonic-gate 		ASSERT(ncp != NULL);
4477c478bd9Sstevel@tonic-gate 
4487c478bd9Sstevel@tonic-gate 		/*
4497c478bd9Sstevel@tonic-gate 		 * Walk thru the active process list to look for
4507c478bd9Sstevel@tonic-gate 		 * threads that need to have a new home lgroup,
4517c478bd9Sstevel@tonic-gate 		 * or the last CPU they run on is the same CPU
4527c478bd9Sstevel@tonic-gate 		 * being moved out of the partition.
4537c478bd9Sstevel@tonic-gate 		 */
4547c478bd9Sstevel@tonic-gate 
4557c478bd9Sstevel@tonic-gate 		for (p = practive; p != NULL; p = p->p_next) {
4567c478bd9Sstevel@tonic-gate 
4577c478bd9Sstevel@tonic-gate 			t = p->p_tlist;
4587c478bd9Sstevel@tonic-gate 
4597c478bd9Sstevel@tonic-gate 			if (t == NULL)
4607c478bd9Sstevel@tonic-gate 				continue;
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate 			lgrp_diff_lpl = 0;
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate 			do {
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl != NULL);
4677c478bd9Sstevel@tonic-gate 
4687c478bd9Sstevel@tonic-gate 				/*
4697c478bd9Sstevel@tonic-gate 				 * Update the count of how many threads are
4707c478bd9Sstevel@tonic-gate 				 * in this CPU's lgroup but have a different lpl
4717c478bd9Sstevel@tonic-gate 				 */
4727c478bd9Sstevel@tonic-gate 
4737c478bd9Sstevel@tonic-gate 				if (t->t_lpl != cpu_lpl &&
4747c478bd9Sstevel@tonic-gate 				    t->t_lpl->lpl_lgrpid == lgrpid)
4757c478bd9Sstevel@tonic-gate 					lgrp_diff_lpl++;
4767c478bd9Sstevel@tonic-gate 				/*
4777c478bd9Sstevel@tonic-gate 				 * If the lgroup that t is assigned to no
4787c478bd9Sstevel@tonic-gate 				 * longer has any CPUs in t's partition,
4797c478bd9Sstevel@tonic-gate 				 * we'll have to choose a new lgroup for t.
4807c478bd9Sstevel@tonic-gate 				 */
4817c478bd9Sstevel@tonic-gate 
4827c478bd9Sstevel@tonic-gate 				if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
4837c478bd9Sstevel@tonic-gate 				    t->t_cpupart)) {
4847c478bd9Sstevel@tonic-gate 					lgrp_move_thread(t,
4857c478bd9Sstevel@tonic-gate 					    lgrp_choose(t, t->t_cpupart), 0);
4867c478bd9Sstevel@tonic-gate 				}
4877c478bd9Sstevel@tonic-gate 
4887c478bd9Sstevel@tonic-gate 				/*
4897c478bd9Sstevel@tonic-gate 				 * make sure lpl points to our own partition
4907c478bd9Sstevel@tonic-gate 				 */
4917c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads &&
4927c478bd9Sstevel@tonic-gate 				    (t->t_lpl < t->t_cpupart->cp_lgrploads +
4937c478bd9Sstevel@tonic-gate 					t->t_cpupart->cp_nlgrploads));
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl->lpl_ncpu > 0);
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 				/* Update CPU last ran on if it was this CPU */
4987c478bd9Sstevel@tonic-gate 				if (t->t_cpu == cp && t->t_cpupart == oldpp &&
4997c478bd9Sstevel@tonic-gate 				    t->t_bound_cpu != cp) {
5007c478bd9Sstevel@tonic-gate 					t->t_cpu = disp_lowpri_cpu(ncp,
5017c478bd9Sstevel@tonic-gate 					    t->t_lpl, t->t_pri, NULL);
5027c478bd9Sstevel@tonic-gate 				}
5037c478bd9Sstevel@tonic-gate 				t = t->t_forw;
5047c478bd9Sstevel@tonic-gate 			} while (t != p->p_tlist);
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate 			/*
5077c478bd9Sstevel@tonic-gate 			 * Didn't find any threads in the same lgroup as this
5087c478bd9Sstevel@tonic-gate 			 * CPU with a different lpl, so remove the lgroup from
5097c478bd9Sstevel@tonic-gate 			 * the process lgroup bitmask.
5107c478bd9Sstevel@tonic-gate 			 */
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 			if (lgrp_diff_lpl)
5137c478bd9Sstevel@tonic-gate 				klgrpset_del(p->p_lgrpset, lgrpid);
5147c478bd9Sstevel@tonic-gate 		}
5157c478bd9Sstevel@tonic-gate 
5167c478bd9Sstevel@tonic-gate 		/*
5177c478bd9Sstevel@tonic-gate 		 * Walk thread list looking for threads that need to be
5187c478bd9Sstevel@tonic-gate 		 * rehomed, since there are some threads that are not in
5197c478bd9Sstevel@tonic-gate 		 * their process's p_tlist.
5207c478bd9Sstevel@tonic-gate 		 */
5217c478bd9Sstevel@tonic-gate 
5227c478bd9Sstevel@tonic-gate 		t = curthread;
5237c478bd9Sstevel@tonic-gate 
5247c478bd9Sstevel@tonic-gate 		do {
5257c478bd9Sstevel@tonic-gate 			ASSERT(t != NULL && t->t_lpl != NULL);
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate 			/*
5287c478bd9Sstevel@tonic-gate 			 * If the lgroup that t is assigned to no
5297c478bd9Sstevel@tonic-gate 			 * longer has any CPUs in t's partition,
5307c478bd9Sstevel@tonic-gate 			 * we'll have to choose a new lgroup for t.
5317c478bd9Sstevel@tonic-gate 			 * Also, choose best lgroup for home when
5327c478bd9Sstevel@tonic-gate 			 * thread has specified lgroup affinities,
5337c478bd9Sstevel@tonic-gate 			 * since there may be an lgroup with more
5347c478bd9Sstevel@tonic-gate 			 * affinity available after moving CPUs
5357c478bd9Sstevel@tonic-gate 			 * around.
5367c478bd9Sstevel@tonic-gate 			 */
5377c478bd9Sstevel@tonic-gate 			if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
5387c478bd9Sstevel@tonic-gate 			    t->t_cpupart) || t->t_lgrp_affinity) {
5397c478bd9Sstevel@tonic-gate 				lgrp_move_thread(t,
5407c478bd9Sstevel@tonic-gate 				    lgrp_choose(t, t->t_cpupart), 1);
5417c478bd9Sstevel@tonic-gate 			}
5427c478bd9Sstevel@tonic-gate 
5437c478bd9Sstevel@tonic-gate 			/* make sure lpl points to our own partition */
5447c478bd9Sstevel@tonic-gate 			ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) &&
5457c478bd9Sstevel@tonic-gate 			    (t->t_lpl < t->t_cpupart->cp_lgrploads +
5467c478bd9Sstevel@tonic-gate 				t->t_cpupart->cp_nlgrploads));
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate 			ASSERT(t->t_lpl->lpl_ncpu > 0);
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 			/* Update CPU last ran on if it was this CPU */
5517c478bd9Sstevel@tonic-gate 			if (t->t_cpu == cp && t->t_cpupart == oldpp &&
5527c478bd9Sstevel@tonic-gate 			    t->t_bound_cpu != cp) {
5537c478bd9Sstevel@tonic-gate 				t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl,
5547c478bd9Sstevel@tonic-gate 				    t->t_pri, NULL);
5557c478bd9Sstevel@tonic-gate 			}
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate 			t = t->t_next;
5587c478bd9Sstevel@tonic-gate 		} while (t != curthread);
5597c478bd9Sstevel@tonic-gate 
5607c478bd9Sstevel@tonic-gate 		/*
5617c478bd9Sstevel@tonic-gate 		 * Clear off the CPU's run queue, and the kp queue if the
5627c478bd9Sstevel@tonic-gate 		 * partition is now empty.
5637c478bd9Sstevel@tonic-gate 		 */
5647c478bd9Sstevel@tonic-gate 		disp_cpu_inactive(cp);
5657c478bd9Sstevel@tonic-gate 
5667c478bd9Sstevel@tonic-gate 		/*
5677c478bd9Sstevel@tonic-gate 		 * Make cp switch to a thread from the new partition.
5687c478bd9Sstevel@tonic-gate 		 */
5697c478bd9Sstevel@tonic-gate 		cp->cpu_runrun = 1;
5707c478bd9Sstevel@tonic-gate 		cp->cpu_kprunrun = 1;
5717c478bd9Sstevel@tonic-gate 	}
5727c478bd9Sstevel@tonic-gate 
5737c478bd9Sstevel@tonic-gate 	cpu_inmotion = NULL;
5747c478bd9Sstevel@tonic-gate 	start_cpus();
5757c478bd9Sstevel@tonic-gate 
5767c478bd9Sstevel@tonic-gate 	/*
5777c478bd9Sstevel@tonic-gate 	 * Let anyone interested know that cpu has been added to the set.
5787c478bd9Sstevel@tonic-gate 	 */
5797c478bd9Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
5807c478bd9Sstevel@tonic-gate 
5817c478bd9Sstevel@tonic-gate 	/*
5827c478bd9Sstevel@tonic-gate 	 * Now let the cyclic subsystem know that it can reshuffle cyclics
5837c478bd9Sstevel@tonic-gate 	 * bound to the new processor set.
5847c478bd9Sstevel@tonic-gate 	 */
5857c478bd9Sstevel@tonic-gate 	cyclic_move_in(cp);
5867c478bd9Sstevel@tonic-gate 
5877c478bd9Sstevel@tonic-gate 	return (0);
5887c478bd9Sstevel@tonic-gate }
5897c478bd9Sstevel@tonic-gate 
5907c478bd9Sstevel@tonic-gate /*
5917c478bd9Sstevel@tonic-gate  * Check if thread can be moved to a new cpu partition.  Called by
5927c478bd9Sstevel@tonic-gate  * cpupart_move_thread() and pset_bind_start().
5937c478bd9Sstevel@tonic-gate  */
5947c478bd9Sstevel@tonic-gate int
5957c478bd9Sstevel@tonic-gate cpupart_movable_thread(kthread_id_t tp, cpupart_t *cp, int ignore)
5967c478bd9Sstevel@tonic-gate {
5977c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
5987c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
5997c478bd9Sstevel@tonic-gate 	ASSERT(cp != NULL);
6007c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate 	/*
6037c478bd9Sstevel@tonic-gate 	 * CPU-bound threads can't be moved.
6047c478bd9Sstevel@tonic-gate 	 */
6057c478bd9Sstevel@tonic-gate 	if (!ignore) {
6067c478bd9Sstevel@tonic-gate 		cpu_t *boundcpu = tp->t_bound_cpu ? tp->t_bound_cpu :
6077c478bd9Sstevel@tonic-gate 		    tp->t_weakbound_cpu;
6087c478bd9Sstevel@tonic-gate 		if (boundcpu != NULL && boundcpu->cpu_part != cp)
6097c478bd9Sstevel@tonic-gate 			return (EBUSY);
6107c478bd9Sstevel@tonic-gate 	}
6117c478bd9Sstevel@tonic-gate 	return (0);
6127c478bd9Sstevel@tonic-gate }
6137c478bd9Sstevel@tonic-gate 
6147c478bd9Sstevel@tonic-gate /*
6157c478bd9Sstevel@tonic-gate  * Move thread to new partition.  If ignore is non-zero, then CPU
6167c478bd9Sstevel@tonic-gate  * bindings should be ignored (this is used when destroying a
6177c478bd9Sstevel@tonic-gate  * partition).
6187c478bd9Sstevel@tonic-gate  */
6197c478bd9Sstevel@tonic-gate static int
6207c478bd9Sstevel@tonic-gate cpupart_move_thread(kthread_id_t tp, cpupart_t *newpp, int ignore,
6217c478bd9Sstevel@tonic-gate     void *projbuf, void *zonebuf)
6227c478bd9Sstevel@tonic-gate {
6237c478bd9Sstevel@tonic-gate 	cpupart_t *oldpp = tp->t_cpupart;
6247c478bd9Sstevel@tonic-gate 	int ret;
6257c478bd9Sstevel@tonic-gate 
6267c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
6277c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
6287c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
6297c478bd9Sstevel@tonic-gate 	ASSERT(newpp != NULL);
6307c478bd9Sstevel@tonic-gate 
6317c478bd9Sstevel@tonic-gate 	if (newpp->cp_cpulist == NULL)
6327c478bd9Sstevel@tonic-gate 		return (EINVAL);
6337c478bd9Sstevel@tonic-gate 
6347c478bd9Sstevel@tonic-gate 	/*
6357c478bd9Sstevel@tonic-gate 	 * Check for errors first.
6367c478bd9Sstevel@tonic-gate 	 */
6377c478bd9Sstevel@tonic-gate 	thread_lock(tp);
6387c478bd9Sstevel@tonic-gate 	if ((ret = cpupart_movable_thread(tp, newpp, ignore)) != 0) {
6397c478bd9Sstevel@tonic-gate 		thread_unlock(tp);
6407c478bd9Sstevel@tonic-gate 		return (ret);
6417c478bd9Sstevel@tonic-gate 	}
6427c478bd9Sstevel@tonic-gate 
6437c478bd9Sstevel@tonic-gate 	/* move the thread */
6447c478bd9Sstevel@tonic-gate 	if (oldpp != newpp) {
6457c478bd9Sstevel@tonic-gate 		/*
6467c478bd9Sstevel@tonic-gate 		 * Make the thread switch to the new partition.
6477c478bd9Sstevel@tonic-gate 		 */
6487c478bd9Sstevel@tonic-gate 		tp->t_cpupart = newpp;
6497c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_lpl != NULL);
6507c478bd9Sstevel@tonic-gate 		/*
6517c478bd9Sstevel@tonic-gate 		 * Leave the thread on the same lgroup if possible; otherwise
6527c478bd9Sstevel@tonic-gate 		 * choose a new lgroup for it.  In either case, update its
6537c478bd9Sstevel@tonic-gate 		 * t_lpl.
6547c478bd9Sstevel@tonic-gate 		 */
6557c478bd9Sstevel@tonic-gate 		if (LGRP_CPUS_IN_PART(tp->t_lpl->lpl_lgrpid, newpp) &&
6567c478bd9Sstevel@tonic-gate 		    tp->t_lgrp_affinity == NULL) {
6577c478bd9Sstevel@tonic-gate 			/*
6587c478bd9Sstevel@tonic-gate 			 * The thread's lgroup has CPUs in the thread's new
6597c478bd9Sstevel@tonic-gate 			 * partition, so the thread can stay assigned to the
6607c478bd9Sstevel@tonic-gate 			 * same lgroup.  Update its t_lpl to point to the
6617c478bd9Sstevel@tonic-gate 			 * lpl_t for its lgroup in its new partition.
6627c478bd9Sstevel@tonic-gate 			 */
6637c478bd9Sstevel@tonic-gate 			lgrp_move_thread(tp, &tp->t_cpupart->\
6647c478bd9Sstevel@tonic-gate 			    cp_lgrploads[tp->t_lpl->lpl_lgrpid], 1);
6657c478bd9Sstevel@tonic-gate 		} else {
6667c478bd9Sstevel@tonic-gate 			/*
6677c478bd9Sstevel@tonic-gate 			 * The thread's lgroup has no cpus in its new
6687c478bd9Sstevel@tonic-gate 			 * partition or it has specified lgroup affinities,
6697c478bd9Sstevel@tonic-gate 			 * so choose the best lgroup for the thread and
6707c478bd9Sstevel@tonic-gate 			 * assign it to that lgroup.
6717c478bd9Sstevel@tonic-gate 			 */
6727c478bd9Sstevel@tonic-gate 			lgrp_move_thread(tp, lgrp_choose(tp, tp->t_cpupart),
6737c478bd9Sstevel@tonic-gate 			    1);
6747c478bd9Sstevel@tonic-gate 		}
6757c478bd9Sstevel@tonic-gate 		/*
6767c478bd9Sstevel@tonic-gate 		 * make sure lpl points to our own partition
6777c478bd9Sstevel@tonic-gate 		 */
6787c478bd9Sstevel@tonic-gate 		ASSERT((tp->t_lpl >= tp->t_cpupart->cp_lgrploads) &&
6797c478bd9Sstevel@tonic-gate 		    (tp->t_lpl < tp->t_cpupart->cp_lgrploads +
6807c478bd9Sstevel@tonic-gate 			tp->t_cpupart->cp_nlgrploads));
6817c478bd9Sstevel@tonic-gate 
6827c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_lpl->lpl_ncpu > 0);
6837c478bd9Sstevel@tonic-gate 
6847c478bd9Sstevel@tonic-gate 		if (tp->t_state == TS_ONPROC) {
6857c478bd9Sstevel@tonic-gate 			cpu_surrender(tp);
6867c478bd9Sstevel@tonic-gate 		} else if (tp->t_state == TS_RUN) {
6877c478bd9Sstevel@tonic-gate 			(void) dispdeq(tp);
6887c478bd9Sstevel@tonic-gate 			setbackdq(tp);
6897c478bd9Sstevel@tonic-gate 		}
6907c478bd9Sstevel@tonic-gate 	}
6917c478bd9Sstevel@tonic-gate 
6927c478bd9Sstevel@tonic-gate 	/*
6937c478bd9Sstevel@tonic-gate 	 * Our binding has changed; set TP_CHANGEBIND.
6947c478bd9Sstevel@tonic-gate 	 */
6957c478bd9Sstevel@tonic-gate 	tp->t_proc_flag |= TP_CHANGEBIND;
6967c478bd9Sstevel@tonic-gate 	aston(tp);
6977c478bd9Sstevel@tonic-gate 
6987c478bd9Sstevel@tonic-gate 	thread_unlock(tp);
6997c478bd9Sstevel@tonic-gate 	fss_changepset(tp, newpp, projbuf, zonebuf);
7007c478bd9Sstevel@tonic-gate 
7017c478bd9Sstevel@tonic-gate 	return (0);		/* success */
7027c478bd9Sstevel@tonic-gate }
7037c478bd9Sstevel@tonic-gate 
7047c478bd9Sstevel@tonic-gate 
7057c478bd9Sstevel@tonic-gate /*
7067c478bd9Sstevel@tonic-gate  * This function binds a thread to a partition.  Must be called with the
7077c478bd9Sstevel@tonic-gate  * p_lock of the containing process held (to keep the thread from going
7087c478bd9Sstevel@tonic-gate  * away), and thus also with cpu_lock held (since cpu_lock must be
7097c478bd9Sstevel@tonic-gate  * acquired before p_lock).  If ignore is non-zero, then CPU bindings
7107c478bd9Sstevel@tonic-gate  * should be ignored (this is used when destroying a partition).
7117c478bd9Sstevel@tonic-gate  */
7127c478bd9Sstevel@tonic-gate int
7137c478bd9Sstevel@tonic-gate cpupart_bind_thread(kthread_id_t tp, psetid_t psid, int ignore, void *projbuf,
7147c478bd9Sstevel@tonic-gate     void *zonebuf)
7157c478bd9Sstevel@tonic-gate {
7167c478bd9Sstevel@tonic-gate 	cpupart_t	*newpp;
7177c478bd9Sstevel@tonic-gate 
7187c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7197c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
7207c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7217c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
7227c478bd9Sstevel@tonic-gate 
7237c478bd9Sstevel@tonic-gate 	if (psid == PS_NONE)
7247c478bd9Sstevel@tonic-gate 		newpp = &cp_default;
7257c478bd9Sstevel@tonic-gate 	else {
7267c478bd9Sstevel@tonic-gate 		newpp = cpupart_find(psid);
7277c478bd9Sstevel@tonic-gate 		if (newpp == NULL) {
7287c478bd9Sstevel@tonic-gate 			return (EINVAL);
7297c478bd9Sstevel@tonic-gate 		}
7307c478bd9Sstevel@tonic-gate 	}
7317c478bd9Sstevel@tonic-gate 	return (cpupart_move_thread(tp, newpp, ignore, projbuf, zonebuf));
7327c478bd9Sstevel@tonic-gate }
7337c478bd9Sstevel@tonic-gate 
7347c478bd9Sstevel@tonic-gate 
7357c478bd9Sstevel@tonic-gate /*
7367c478bd9Sstevel@tonic-gate  * Create a new partition.  On MP systems, this also allocates a
7377c478bd9Sstevel@tonic-gate  * kpreempt disp queue for that partition.
7387c478bd9Sstevel@tonic-gate  */
7397c478bd9Sstevel@tonic-gate int
7407c478bd9Sstevel@tonic-gate cpupart_create(psetid_t *psid)
7417c478bd9Sstevel@tonic-gate {
7427c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
7437c478bd9Sstevel@tonic-gate 	lgrp_id_t	i;
7447c478bd9Sstevel@tonic-gate 
7457c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7467c478bd9Sstevel@tonic-gate 
7477c478bd9Sstevel@tonic-gate 	pp = kmem_zalloc(sizeof (cpupart_t), KM_SLEEP);
748e824d57fSjohnlev 	pp->cp_mach = kmem_zalloc(sizeof (struct mach_cpupart), KM_SLEEP);
7497c478bd9Sstevel@tonic-gate 	pp->cp_nlgrploads = lgrp_plat_max_lgrps();
7507c478bd9Sstevel@tonic-gate 	pp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * pp->cp_nlgrploads,
7517c478bd9Sstevel@tonic-gate 	    KM_SLEEP);
7527c478bd9Sstevel@tonic-gate 
7537c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
7547c478bd9Sstevel@tonic-gate 	if (cp_numparts == cp_max_numparts) {
7557c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
7567c478bd9Sstevel@tonic-gate 		kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
7577c478bd9Sstevel@tonic-gate 		pp->cp_lgrploads = NULL;
758e824d57fSjohnlev 		kmem_free(pp->cp_mach, sizeof (struct mach_cpupart));
7597c478bd9Sstevel@tonic-gate 		kmem_free(pp, sizeof (cpupart_t));
7607c478bd9Sstevel@tonic-gate 		return (ENOMEM);
7617c478bd9Sstevel@tonic-gate 	}
7627c478bd9Sstevel@tonic-gate 	cp_numparts++;
7637c478bd9Sstevel@tonic-gate 	/* find the next free partition ID */
7647c478bd9Sstevel@tonic-gate 	while (cpupart_find(CPTOPS(cp_id_next)) != NULL)
7657c478bd9Sstevel@tonic-gate 		cp_id_next++;
7667c478bd9Sstevel@tonic-gate 	pp->cp_id = cp_id_next++;
7677c478bd9Sstevel@tonic-gate 	pp->cp_ncpus = 0;
7687c478bd9Sstevel@tonic-gate 	pp->cp_cpulist = NULL;
7697c478bd9Sstevel@tonic-gate 	pp->cp_attr = 0;
7707c478bd9Sstevel@tonic-gate 	klgrpset_clear(pp->cp_lgrpset);
7717c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_maxrunpri = -1;
7727c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_max_unbound_pri = -1;
7737c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_cpu = NULL;
7747c478bd9Sstevel@tonic-gate 	pp->cp_gen = 0;
775e824d57fSjohnlev 	CPUSET_ZERO(pp->cp_mach->mc_haltset);
7767c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&pp->cp_kp_queue.disp_lock);
7777c478bd9Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
7787c478bd9Sstevel@tonic-gate 	disp_kp_alloc(&pp->cp_kp_queue, v.v_nglobpris);
7797c478bd9Sstevel@tonic-gate 	cpupart_kstat_create(pp);
7807c478bd9Sstevel@tonic-gate 	for (i = 0; i < pp->cp_nlgrploads; i++) {
7817c478bd9Sstevel@tonic-gate 		pp->cp_lgrploads[i].lpl_lgrpid = i;
7827c478bd9Sstevel@tonic-gate 	}
783*fb2f18f8Sesaxe 	bitset_init(&pp->cp_cmt_pgs);
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 	/*
7867c478bd9Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
7877c478bd9Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
7887c478bd9Sstevel@tonic-gate 	 * cpu_lock) isn't running.
7897c478bd9Sstevel@tonic-gate 	 */
7907c478bd9Sstevel@tonic-gate 	pause_cpus(NULL);
7917c478bd9Sstevel@tonic-gate 	pp->cp_next = cp_list_head;
7927c478bd9Sstevel@tonic-gate 	pp->cp_prev = cp_list_head->cp_prev;
7937c478bd9Sstevel@tonic-gate 	cp_list_head->cp_prev->cp_next = pp;
7947c478bd9Sstevel@tonic-gate 	cp_list_head->cp_prev = pp;
7957c478bd9Sstevel@tonic-gate 	start_cpus();
7967c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
7977c478bd9Sstevel@tonic-gate 
7987c478bd9Sstevel@tonic-gate 	return (0);
7997c478bd9Sstevel@tonic-gate }
8007c478bd9Sstevel@tonic-gate 
8017c478bd9Sstevel@tonic-gate 
8027c478bd9Sstevel@tonic-gate /*
8037c478bd9Sstevel@tonic-gate  * Destroy a partition.
8047c478bd9Sstevel@tonic-gate  */
8057c478bd9Sstevel@tonic-gate int
8067c478bd9Sstevel@tonic-gate cpupart_destroy(psetid_t psid)
8077c478bd9Sstevel@tonic-gate {
8087c478bd9Sstevel@tonic-gate 	cpu_t	*cp, *first_cp;
8097c478bd9Sstevel@tonic-gate 	cpupart_t *pp, *newpp;
8107c478bd9Sstevel@tonic-gate 	int	err = 0;
8117c478bd9Sstevel@tonic-gate 	void 	*projbuf, *zonebuf;
8127c478bd9Sstevel@tonic-gate 	kthread_t *t;
8137c478bd9Sstevel@tonic-gate 	proc_t	*p;
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8167c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate 	pp = cpupart_find(psid);
8197c478bd9Sstevel@tonic-gate 	if (pp == NULL || pp == &cp_default) {
8207c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
8217c478bd9Sstevel@tonic-gate 		return (EINVAL);
8227c478bd9Sstevel@tonic-gate 	}
8237c478bd9Sstevel@tonic-gate 
8247c478bd9Sstevel@tonic-gate 	/*
8257c478bd9Sstevel@tonic-gate 	 * Pre-allocate enough buffers for FSS for all active projects and
8267c478bd9Sstevel@tonic-gate 	 * for all active zones on the system.  Unused buffers will be
8277c478bd9Sstevel@tonic-gate 	 * freed later by fss_freebuf().
8287c478bd9Sstevel@tonic-gate 	 */
8297c478bd9Sstevel@tonic-gate 	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
8307c478bd9Sstevel@tonic-gate 	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
8317c478bd9Sstevel@tonic-gate 
8327c478bd9Sstevel@tonic-gate 	/*
8337c478bd9Sstevel@tonic-gate 	 * First need to unbind all the threads currently bound to the
8347c478bd9Sstevel@tonic-gate 	 * partition.  Then do the actual destroy (which moves the CPUs).
8357c478bd9Sstevel@tonic-gate 	 */
8367c478bd9Sstevel@tonic-gate 	mutex_enter(&pidlock);
8377c478bd9Sstevel@tonic-gate 	t = curthread;
8387c478bd9Sstevel@tonic-gate 	do {
8397c478bd9Sstevel@tonic-gate 		if (t->t_bind_pset == psid) {
8407c478bd9Sstevel@tonic-gate again:			p = ttoproc(t);
8417c478bd9Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
8427c478bd9Sstevel@tonic-gate 			if (ttoproc(t) != p) {
8437c478bd9Sstevel@tonic-gate 				/*
8447c478bd9Sstevel@tonic-gate 				 * lwp_exit has changed this thread's process
8457c478bd9Sstevel@tonic-gate 				 * pointer before we grabbed its p_lock.
8467c478bd9Sstevel@tonic-gate 				 */
8477c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
8487c478bd9Sstevel@tonic-gate 				goto again;
8497c478bd9Sstevel@tonic-gate 			}
8507c478bd9Sstevel@tonic-gate 			err = cpupart_bind_thread(t, PS_NONE, 1,
8517c478bd9Sstevel@tonic-gate 			    projbuf, zonebuf);
8527c478bd9Sstevel@tonic-gate 			if (err) {
8537c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
8547c478bd9Sstevel@tonic-gate 				mutex_exit(&pidlock);
8557c478bd9Sstevel@tonic-gate 				mutex_exit(&cpu_lock);
8567c478bd9Sstevel@tonic-gate 				fss_freebuf(projbuf, FSS_ALLOC_PROJ);
8577c478bd9Sstevel@tonic-gate 				fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
8587c478bd9Sstevel@tonic-gate 				return (err);
8597c478bd9Sstevel@tonic-gate 			}
8607c478bd9Sstevel@tonic-gate 			t->t_bind_pset = PS_NONE;
8617c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
8627c478bd9Sstevel@tonic-gate 		}
8637c478bd9Sstevel@tonic-gate 		t = t->t_next;
8647c478bd9Sstevel@tonic-gate 	} while (t != curthread);
8657c478bd9Sstevel@tonic-gate 
8667c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
8677c478bd9Sstevel@tonic-gate 	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
8687c478bd9Sstevel@tonic-gate 	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
8697c478bd9Sstevel@tonic-gate 
8707c478bd9Sstevel@tonic-gate 	newpp = &cp_default;
8717c478bd9Sstevel@tonic-gate 	while ((cp = pp->cp_cpulist) != NULL) {
8727c478bd9Sstevel@tonic-gate 		if (err = cpupart_move_cpu(cp, newpp, 0)) {
8737c478bd9Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
8747c478bd9Sstevel@tonic-gate 			return (err);
8757c478bd9Sstevel@tonic-gate 		}
8767c478bd9Sstevel@tonic-gate 	}
8777c478bd9Sstevel@tonic-gate 
878*fb2f18f8Sesaxe 	ASSERT(bitset_is_null(&pp->cp_cmt_pgs));
879e824d57fSjohnlev 	ASSERT(CPUSET_ISNULL(pp->cp_mach->mc_haltset));
8807c478bd9Sstevel@tonic-gate 
8817c478bd9Sstevel@tonic-gate 	/*
882*fb2f18f8Sesaxe 	 * Teardown the partition's group of active CMT PGs now that
883*fb2f18f8Sesaxe 	 * all of the CPUs have left.
884*fb2f18f8Sesaxe 	 */
885*fb2f18f8Sesaxe 	bitset_fini(&pp->cp_cmt_pgs);
886*fb2f18f8Sesaxe 
887*fb2f18f8Sesaxe 	/*
8887c478bd9Sstevel@tonic-gate 	 * Reset the pointers in any offline processors so they won't
8897c478bd9Sstevel@tonic-gate 	 * try to rejoin the destroyed partition when they're turned
8907c478bd9Sstevel@tonic-gate 	 * online.
8917c478bd9Sstevel@tonic-gate 	 */
8927c478bd9Sstevel@tonic-gate 	first_cp = cp = CPU;
8937c478bd9Sstevel@tonic-gate 	do {
8947c478bd9Sstevel@tonic-gate 		if (cp->cpu_part == pp) {
8957c478bd9Sstevel@tonic-gate 			ASSERT(cp->cpu_flags & CPU_OFFLINE);
8967c478bd9Sstevel@tonic-gate 			cp->cpu_part = newpp;
8977c478bd9Sstevel@tonic-gate 		}
8987c478bd9Sstevel@tonic-gate 		cp = cp->cpu_next;
8997c478bd9Sstevel@tonic-gate 	} while (cp != first_cp);
9007c478bd9Sstevel@tonic-gate 
9017c478bd9Sstevel@tonic-gate 	/*
9027c478bd9Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
9037c478bd9Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
9047c478bd9Sstevel@tonic-gate 	 * cpu_lock) isn't running.
9057c478bd9Sstevel@tonic-gate 	 */
9067c478bd9Sstevel@tonic-gate 	pause_cpus(NULL);
9077c478bd9Sstevel@tonic-gate 	pp->cp_prev->cp_next = pp->cp_next;
9087c478bd9Sstevel@tonic-gate 	pp->cp_next->cp_prev = pp->cp_prev;
9097c478bd9Sstevel@tonic-gate 	if (cp_list_head == pp)
9107c478bd9Sstevel@tonic-gate 		cp_list_head = pp->cp_next;
9117c478bd9Sstevel@tonic-gate 	start_cpus();
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate 	if (cp_id_next > pp->cp_id)
9147c478bd9Sstevel@tonic-gate 		cp_id_next = pp->cp_id;
9157c478bd9Sstevel@tonic-gate 
9167c478bd9Sstevel@tonic-gate 	if (pp->cp_kstat)
9177c478bd9Sstevel@tonic-gate 		kstat_delete(pp->cp_kstat);
9187c478bd9Sstevel@tonic-gate 
9197c478bd9Sstevel@tonic-gate 	cp_numparts--;
9207c478bd9Sstevel@tonic-gate 
9217c478bd9Sstevel@tonic-gate 	disp_kp_free(&pp->cp_kp_queue);
9227c478bd9Sstevel@tonic-gate 	kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
9237c478bd9Sstevel@tonic-gate 	pp->cp_lgrploads = NULL;
924e824d57fSjohnlev 	kmem_free(pp->cp_mach, sizeof (struct mach_cpupart));
9257c478bd9Sstevel@tonic-gate 	kmem_free(pp, sizeof (cpupart_t));
9267c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
9277c478bd9Sstevel@tonic-gate 
9287c478bd9Sstevel@tonic-gate 	return (err);
9297c478bd9Sstevel@tonic-gate }
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate 
9327c478bd9Sstevel@tonic-gate /*
9337c478bd9Sstevel@tonic-gate  * Return the ID of the partition to which the specified processor belongs.
9347c478bd9Sstevel@tonic-gate  */
9357c478bd9Sstevel@tonic-gate psetid_t
9367c478bd9Sstevel@tonic-gate cpupart_query_cpu(cpu_t *cp)
9377c478bd9Sstevel@tonic-gate {
9387c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate 	return (CPTOPS(cp->cpu_part->cp_id));
9417c478bd9Sstevel@tonic-gate }
9427c478bd9Sstevel@tonic-gate 
9437c478bd9Sstevel@tonic-gate 
9447c478bd9Sstevel@tonic-gate /*
9457c478bd9Sstevel@tonic-gate  * Attach a processor to an existing partition.
9467c478bd9Sstevel@tonic-gate  */
9477c478bd9Sstevel@tonic-gate int
9487c478bd9Sstevel@tonic-gate cpupart_attach_cpu(psetid_t psid, cpu_t *cp, int forced)
9497c478bd9Sstevel@tonic-gate {
9507c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
9517c478bd9Sstevel@tonic-gate 	int		err;
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
9547c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
9557c478bd9Sstevel@tonic-gate 
9567c478bd9Sstevel@tonic-gate 	pp = cpupart_find(psid);
9577c478bd9Sstevel@tonic-gate 	if (pp == NULL)
9587c478bd9Sstevel@tonic-gate 		return (EINVAL);
9597c478bd9Sstevel@tonic-gate 	if (cp->cpu_flags & CPU_OFFLINE)
9607c478bd9Sstevel@tonic-gate 		return (EINVAL);
9617c478bd9Sstevel@tonic-gate 
9627c478bd9Sstevel@tonic-gate 	err = cpupart_move_cpu(cp, pp, forced);
9637c478bd9Sstevel@tonic-gate 	return (err);
9647c478bd9Sstevel@tonic-gate }
9657c478bd9Sstevel@tonic-gate 
9667c478bd9Sstevel@tonic-gate /*
9677c478bd9Sstevel@tonic-gate  * Get a list of cpus belonging to the partition.  If numcpus is NULL,
9687c478bd9Sstevel@tonic-gate  * this just checks for a valid partition.  If numcpus is non-NULL but
9697c478bd9Sstevel@tonic-gate  * cpulist is NULL, the current number of cpus is stored in *numcpus.
9707c478bd9Sstevel@tonic-gate  * If both are non-NULL, the current number of cpus is stored in *numcpus,
9717c478bd9Sstevel@tonic-gate  * and a list of those cpus up to the size originally in *numcpus is
9727c478bd9Sstevel@tonic-gate  * stored in cpulist[].  Also, store the processor set id in *psid.
9737c478bd9Sstevel@tonic-gate  * This is useful in case the processor set id passed in was PS_MYID.
9747c478bd9Sstevel@tonic-gate  */
9757c478bd9Sstevel@tonic-gate int
9767c478bd9Sstevel@tonic-gate cpupart_get_cpus(psetid_t *psid, processorid_t *cpulist, uint_t *numcpus)
9777c478bd9Sstevel@tonic-gate {
9787c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
9797c478bd9Sstevel@tonic-gate 	uint_t		ncpus;
9807c478bd9Sstevel@tonic-gate 	cpu_t		*c;
9817c478bd9Sstevel@tonic-gate 	int		i;
9827c478bd9Sstevel@tonic-gate 
9837c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
9847c478bd9Sstevel@tonic-gate 	pp = cpupart_find(*psid);
9857c478bd9Sstevel@tonic-gate 	if (pp == NULL) {
9867c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
9877c478bd9Sstevel@tonic-gate 		return (EINVAL);
9887c478bd9Sstevel@tonic-gate 	}
9897c478bd9Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
9907c478bd9Sstevel@tonic-gate 	ncpus = pp->cp_ncpus;
9917c478bd9Sstevel@tonic-gate 	if (numcpus) {
9927c478bd9Sstevel@tonic-gate 		if (ncpus > *numcpus) {
9937c478bd9Sstevel@tonic-gate 			/*
9947c478bd9Sstevel@tonic-gate 			 * Only copy as many cpus as were passed in, but
9957c478bd9Sstevel@tonic-gate 			 * pass back the real number.
9967c478bd9Sstevel@tonic-gate 			 */
9977c478bd9Sstevel@tonic-gate 			uint_t t = ncpus;
9987c478bd9Sstevel@tonic-gate 			ncpus = *numcpus;
9997c478bd9Sstevel@tonic-gate 			*numcpus = t;
10007c478bd9Sstevel@tonic-gate 		} else
10017c478bd9Sstevel@tonic-gate 			*numcpus = ncpus;
10027c478bd9Sstevel@tonic-gate 
10037c478bd9Sstevel@tonic-gate 		if (cpulist) {
10047c478bd9Sstevel@tonic-gate 			c = pp->cp_cpulist;
10057c478bd9Sstevel@tonic-gate 			for (i = 0; i < ncpus; i++) {
10067c478bd9Sstevel@tonic-gate 				ASSERT(c != NULL);
10077c478bd9Sstevel@tonic-gate 				cpulist[i] = c->cpu_id;
10087c478bd9Sstevel@tonic-gate 				c = c->cpu_next_part;
10097c478bd9Sstevel@tonic-gate 			}
10107c478bd9Sstevel@tonic-gate 		}
10117c478bd9Sstevel@tonic-gate 	}
10127c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
10137c478bd9Sstevel@tonic-gate 	return (0);
10147c478bd9Sstevel@tonic-gate }
10157c478bd9Sstevel@tonic-gate 
10167c478bd9Sstevel@tonic-gate /*
10177c478bd9Sstevel@tonic-gate  * Reallocate kpreempt queues for each CPU partition.  Called from
10187c478bd9Sstevel@tonic-gate  * disp_setup when a new scheduling class is loaded that increases the
10197c478bd9Sstevel@tonic-gate  * number of priorities in the system.
10207c478bd9Sstevel@tonic-gate  */
10217c478bd9Sstevel@tonic-gate void
10227c478bd9Sstevel@tonic-gate cpupart_kpqalloc(pri_t npri)
10237c478bd9Sstevel@tonic-gate {
10247c478bd9Sstevel@tonic-gate 	cpupart_t *cpp;
10257c478bd9Sstevel@tonic-gate 
10267c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10277c478bd9Sstevel@tonic-gate 	cpp = cp_list_head;
10287c478bd9Sstevel@tonic-gate 	do {
10297c478bd9Sstevel@tonic-gate 		disp_kp_alloc(&cpp->cp_kp_queue, npri);
10307c478bd9Sstevel@tonic-gate 		cpp = cpp->cp_next;
10317c478bd9Sstevel@tonic-gate 	} while (cpp != cp_list_head);
10327c478bd9Sstevel@tonic-gate }
10337c478bd9Sstevel@tonic-gate 
10347c478bd9Sstevel@tonic-gate int
10357c478bd9Sstevel@tonic-gate cpupart_get_loadavg(psetid_t psid, int *buf, int nelem)
10367c478bd9Sstevel@tonic-gate {
10377c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
10387c478bd9Sstevel@tonic-gate 	int i;
10397c478bd9Sstevel@tonic-gate 
10407c478bd9Sstevel@tonic-gate 	ASSERT(nelem >= 0);
10417c478bd9Sstevel@tonic-gate 	ASSERT(nelem <= LOADAVG_NSTATS);
10427c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10437c478bd9Sstevel@tonic-gate 
10447c478bd9Sstevel@tonic-gate 	cp = cpupart_find(psid);
10457c478bd9Sstevel@tonic-gate 	if (cp == NULL)
10467c478bd9Sstevel@tonic-gate 		return (EINVAL);
10477c478bd9Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
10487c478bd9Sstevel@tonic-gate 		buf[i] = cp->cp_hp_avenrun[i] >> (16 - FSHIFT);
10497c478bd9Sstevel@tonic-gate 
10507c478bd9Sstevel@tonic-gate 	return (0);
10517c478bd9Sstevel@tonic-gate }
10527c478bd9Sstevel@tonic-gate 
10537c478bd9Sstevel@tonic-gate 
10547c478bd9Sstevel@tonic-gate uint_t
10557c478bd9Sstevel@tonic-gate cpupart_list(psetid_t *list, uint_t nelem, int flag)
10567c478bd9Sstevel@tonic-gate {
10577c478bd9Sstevel@tonic-gate 	uint_t numpart = 0;
10587c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10617c478bd9Sstevel@tonic-gate 	ASSERT(flag == CP_ALL || flag == CP_NONEMPTY);
10627c478bd9Sstevel@tonic-gate 
10637c478bd9Sstevel@tonic-gate 	if (list != NULL) {
10647c478bd9Sstevel@tonic-gate 		cp = cp_list_head;
10657c478bd9Sstevel@tonic-gate 		do {
10667c478bd9Sstevel@tonic-gate 			if (((flag == CP_ALL) && (cp != &cp_default)) ||
10677c478bd9Sstevel@tonic-gate 			    ((flag == CP_NONEMPTY) && (cp->cp_ncpus != 0))) {
10687c478bd9Sstevel@tonic-gate 				if (numpart == nelem)
10697c478bd9Sstevel@tonic-gate 					break;
10707c478bd9Sstevel@tonic-gate 				list[numpart++] = CPTOPS(cp->cp_id);
10717c478bd9Sstevel@tonic-gate 			}
10727c478bd9Sstevel@tonic-gate 			cp = cp->cp_next;
10737c478bd9Sstevel@tonic-gate 		} while (cp != cp_list_head);
10747c478bd9Sstevel@tonic-gate 	}
10757c478bd9Sstevel@tonic-gate 
10767c478bd9Sstevel@tonic-gate 	ASSERT(numpart < cp_numparts);
10777c478bd9Sstevel@tonic-gate 
10787c478bd9Sstevel@tonic-gate 	if (flag == CP_ALL)
10797c478bd9Sstevel@tonic-gate 		numpart = cp_numparts - 1; /* leave out default partition */
10807c478bd9Sstevel@tonic-gate 	else if (flag == CP_NONEMPTY)
10817c478bd9Sstevel@tonic-gate 		numpart = cp_numparts_nonempty;
10827c478bd9Sstevel@tonic-gate 
10837c478bd9Sstevel@tonic-gate 	return (numpart);
10847c478bd9Sstevel@tonic-gate }
10857c478bd9Sstevel@tonic-gate 
10867c478bd9Sstevel@tonic-gate int
10877c478bd9Sstevel@tonic-gate cpupart_setattr(psetid_t psid, uint_t attr)
10887c478bd9Sstevel@tonic-gate {
10897c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
10907c478bd9Sstevel@tonic-gate 
10917c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10927c478bd9Sstevel@tonic-gate 
10937c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
10947c478bd9Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
10957c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
10967c478bd9Sstevel@tonic-gate 		return (EINVAL);
10977c478bd9Sstevel@tonic-gate 	}
10987c478bd9Sstevel@tonic-gate 	/*
10997c478bd9Sstevel@tonic-gate 	 * PSET_NOESCAPE attribute for default cpu partition is always set
11007c478bd9Sstevel@tonic-gate 	 */
11017c478bd9Sstevel@tonic-gate 	if (cp == &cp_default && !(attr & PSET_NOESCAPE)) {
11027c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
11037c478bd9Sstevel@tonic-gate 		return (EINVAL);
11047c478bd9Sstevel@tonic-gate 	}
11057c478bd9Sstevel@tonic-gate 	cp->cp_attr = attr;
11067c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
11077c478bd9Sstevel@tonic-gate 	return (0);
11087c478bd9Sstevel@tonic-gate }
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate int
11117c478bd9Sstevel@tonic-gate cpupart_getattr(psetid_t psid, uint_t *attrp)
11127c478bd9Sstevel@tonic-gate {
11137c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
11147c478bd9Sstevel@tonic-gate 
11157c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
11167c478bd9Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
11177c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
11187c478bd9Sstevel@tonic-gate 		return (EINVAL);
11197c478bd9Sstevel@tonic-gate 	}
11207c478bd9Sstevel@tonic-gate 	*attrp = cp->cp_attr;
11217c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
11227c478bd9Sstevel@tonic-gate 	return (0);
11237c478bd9Sstevel@tonic-gate }
1124