xref: /titanic_54/usr/src/uts/common/disp/cpupart.c (revision e824d57f8160a27ac5e650005c7a4f037109c2be)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*e824d57fSjohnlev  * Common Development and Distribution License (the "License").
6*e824d57fSjohnlev  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*e824d57fSjohnlev  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include <sys/types.h>
297c478bd9Sstevel@tonic-gate #include <sys/systm.h>
307c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
317c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
327c478bd9Sstevel@tonic-gate #include <sys/thread.h>
337c478bd9Sstevel@tonic-gate #include <sys/disp.h>
347c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
357c478bd9Sstevel@tonic-gate #include <sys/debug.h>
367c478bd9Sstevel@tonic-gate #include <sys/cpupart.h>
377c478bd9Sstevel@tonic-gate #include <sys/pset.h>
387c478bd9Sstevel@tonic-gate #include <sys/var.h>
397c478bd9Sstevel@tonic-gate #include <sys/cyclic.h>
407c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
417c478bd9Sstevel@tonic-gate #include <sys/chip.h>
427c478bd9Sstevel@tonic-gate #include <sys/loadavg.h>
437c478bd9Sstevel@tonic-gate #include <sys/class.h>
447c478bd9Sstevel@tonic-gate #include <sys/fss.h>
457c478bd9Sstevel@tonic-gate #include <sys/pool.h>
467c478bd9Sstevel@tonic-gate #include <sys/pool_pset.h>
477c478bd9Sstevel@tonic-gate #include <sys/policy.h>
487c478bd9Sstevel@tonic-gate 
497c478bd9Sstevel@tonic-gate /*
507c478bd9Sstevel@tonic-gate  * Calling pool_lock() protects the pools configuration, which includes
517c478bd9Sstevel@tonic-gate  * CPU partitions.  cpu_lock protects the CPU partition list, and prevents
527c478bd9Sstevel@tonic-gate  * partitions from being created or destroyed while the lock is held.
537c478bd9Sstevel@tonic-gate  * The lock ordering with respect to related locks is:
547c478bd9Sstevel@tonic-gate  *
557c478bd9Sstevel@tonic-gate  *    pool_lock() ---> cpu_lock  --->  pidlock  -->  p_lock
567c478bd9Sstevel@tonic-gate  *
577c478bd9Sstevel@tonic-gate  * Blocking memory allocations may be made while holding "pool_lock"
587c478bd9Sstevel@tonic-gate  * or cpu_lock.
597c478bd9Sstevel@tonic-gate  */
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate /*
627c478bd9Sstevel@tonic-gate  * The cp_default partition is allocated statically, but its lgroup load average
637c478bd9Sstevel@tonic-gate  * (lpl) list is allocated dynamically after kmem subsystem is initialized. This
647c478bd9Sstevel@tonic-gate  * saves some memory since the space allocated reflects the actual number of
657c478bd9Sstevel@tonic-gate  * lgroups supported by the platform. The lgrp facility provides a temporary
667c478bd9Sstevel@tonic-gate  * space to hold lpl information during system bootstrap.
677c478bd9Sstevel@tonic-gate  */
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate cpupart_t		*cp_list_head;
707c478bd9Sstevel@tonic-gate cpupart_t		cp_default;
71*e824d57fSjohnlev struct mach_cpupart	cp_default_mach;
727c478bd9Sstevel@tonic-gate static cpupartid_t	cp_id_next;
737c478bd9Sstevel@tonic-gate uint_t			cp_numparts;
747c478bd9Sstevel@tonic-gate uint_t			cp_numparts_nonempty;
757c478bd9Sstevel@tonic-gate 
767c478bd9Sstevel@tonic-gate /*
777c478bd9Sstevel@tonic-gate  * Need to limit total number of partitions to avoid slowing down the
787c478bd9Sstevel@tonic-gate  * clock code too much.  The clock code traverses the list of
797c478bd9Sstevel@tonic-gate  * partitions and needs to be able to execute in a reasonable amount
807c478bd9Sstevel@tonic-gate  * of time (less than 1/hz seconds).  The maximum is sized based on
817c478bd9Sstevel@tonic-gate  * max_ncpus so it shouldn't be a problem unless there are large
827c478bd9Sstevel@tonic-gate  * numbers of empty partitions.
837c478bd9Sstevel@tonic-gate  */
847c478bd9Sstevel@tonic-gate static uint_t		cp_max_numparts;
857c478bd9Sstevel@tonic-gate 
867c478bd9Sstevel@tonic-gate /*
877c478bd9Sstevel@tonic-gate  * Processor sets and CPU partitions are different but related concepts.
887c478bd9Sstevel@tonic-gate  * A processor set is a user-level abstraction allowing users to create
897c478bd9Sstevel@tonic-gate  * sets of CPUs and bind threads exclusively to those sets.  A CPU
907c478bd9Sstevel@tonic-gate  * partition is a kernel dispatcher object consisting of a set of CPUs
917c478bd9Sstevel@tonic-gate  * and a global dispatch queue.  The processor set abstraction is
927c478bd9Sstevel@tonic-gate  * implemented via a CPU partition, and currently there is a 1-1
937c478bd9Sstevel@tonic-gate  * mapping between processor sets and partitions (excluding the default
947c478bd9Sstevel@tonic-gate  * partition, which is not visible as a processor set).  Hence, the
957c478bd9Sstevel@tonic-gate  * numbering for processor sets and CPU partitions is identical.  This
967c478bd9Sstevel@tonic-gate  * may not always be true in the future, and these macros could become
977c478bd9Sstevel@tonic-gate  * less trivial if we support e.g. a processor set containing multiple
987c478bd9Sstevel@tonic-gate  * CPU partitions.
997c478bd9Sstevel@tonic-gate  */
1007c478bd9Sstevel@tonic-gate #define	PSTOCP(psid)	((cpupartid_t)((psid) == PS_NONE ? CP_DEFAULT : (psid)))
1017c478bd9Sstevel@tonic-gate #define	CPTOPS(cpid)	((psetid_t)((cpid) == CP_DEFAULT ? PS_NONE : (cpid)))
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate /*
1047c478bd9Sstevel@tonic-gate  * Find a CPU partition given a processor set ID.
1057c478bd9Sstevel@tonic-gate  */
1067c478bd9Sstevel@tonic-gate static cpupart_t *
1077c478bd9Sstevel@tonic-gate cpupart_find_all(psetid_t psid)
1087c478bd9Sstevel@tonic-gate {
1097c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1107c478bd9Sstevel@tonic-gate 	cpupartid_t cpid = PSTOCP(psid);
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1137c478bd9Sstevel@tonic-gate 
1147c478bd9Sstevel@tonic-gate 	/* default partition not visible as a processor set */
1157c478bd9Sstevel@tonic-gate 	if (psid == CP_DEFAULT)
1167c478bd9Sstevel@tonic-gate 		return (NULL);
1177c478bd9Sstevel@tonic-gate 
1187c478bd9Sstevel@tonic-gate 	if (psid == PS_MYID)
1197c478bd9Sstevel@tonic-gate 		return (curthread->t_cpupart);
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate 	cp = cp_list_head;
1227c478bd9Sstevel@tonic-gate 	do {
1237c478bd9Sstevel@tonic-gate 		if (cp->cp_id == cpid)
1247c478bd9Sstevel@tonic-gate 			return (cp);
1257c478bd9Sstevel@tonic-gate 		cp = cp->cp_next;
1267c478bd9Sstevel@tonic-gate 	} while (cp != cp_list_head);
1277c478bd9Sstevel@tonic-gate 	return (NULL);
1287c478bd9Sstevel@tonic-gate }
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate /*
1317c478bd9Sstevel@tonic-gate  * Find a CPU partition given a processor set ID if the processor set
1327c478bd9Sstevel@tonic-gate  * should be visible from the calling zone.
1337c478bd9Sstevel@tonic-gate  */
1347c478bd9Sstevel@tonic-gate cpupart_t *
1357c478bd9Sstevel@tonic-gate cpupart_find(psetid_t psid)
1367c478bd9Sstevel@tonic-gate {
1377c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1407c478bd9Sstevel@tonic-gate 	cp = cpupart_find_all(psid);
1417c478bd9Sstevel@tonic-gate 	if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
1427c478bd9Sstevel@tonic-gate 	    zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id))
1437c478bd9Sstevel@tonic-gate 			return (NULL);
1447c478bd9Sstevel@tonic-gate 	return (cp);
1457c478bd9Sstevel@tonic-gate }
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate static int
1487c478bd9Sstevel@tonic-gate cpupart_kstat_update(kstat_t *ksp, int rw)
1497c478bd9Sstevel@tonic-gate {
1507c478bd9Sstevel@tonic-gate 	cpupart_t *cp = (cpupart_t *)ksp->ks_private;
1517c478bd9Sstevel@tonic-gate 	cpupart_kstat_t *cpksp = ksp->ks_data;
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
1547c478bd9Sstevel@tonic-gate 		return (EACCES);
1557c478bd9Sstevel@tonic-gate 
1567c478bd9Sstevel@tonic-gate 	cpksp->cpk_updates.value.ui64 = cp->cp_updates;
1577c478bd9Sstevel@tonic-gate 	cpksp->cpk_runnable.value.ui64 = cp->cp_nrunnable_cum;
1587c478bd9Sstevel@tonic-gate 	cpksp->cpk_waiting.value.ui64 = cp->cp_nwaiting_cum;
1597c478bd9Sstevel@tonic-gate 	cpksp->cpk_ncpus.value.ui32 = cp->cp_ncpus;
1607c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_1min.value.ui32 = cp->cp_hp_avenrun[0] >>
1617c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
1627c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_5min.value.ui32 = cp->cp_hp_avenrun[1] >>
1637c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
1647c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_15min.value.ui32 = cp->cp_hp_avenrun[2] >>
1657c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
1667c478bd9Sstevel@tonic-gate 	return (0);
1677c478bd9Sstevel@tonic-gate }
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate static void
1707c478bd9Sstevel@tonic-gate cpupart_kstat_create(cpupart_t *cp)
1717c478bd9Sstevel@tonic-gate {
1727c478bd9Sstevel@tonic-gate 	kstat_t *ksp;
1737c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate 	/*
1787c478bd9Sstevel@tonic-gate 	 * We have a bit of a chicken-egg problem since this code will
1797c478bd9Sstevel@tonic-gate 	 * get called to create the kstats for CP_DEFAULT before the
1807c478bd9Sstevel@tonic-gate 	 * pools framework gets initialized.  We circumvent the problem
1817c478bd9Sstevel@tonic-gate 	 * by special-casing cp_default.
1827c478bd9Sstevel@tonic-gate 	 */
1837c478bd9Sstevel@tonic-gate 	if (cp != &cp_default && pool_pset_enabled())
1847c478bd9Sstevel@tonic-gate 		zoneid = GLOBAL_ZONEID;
1857c478bd9Sstevel@tonic-gate 	else
1867c478bd9Sstevel@tonic-gate 		zoneid = ALL_ZONES;
1877c478bd9Sstevel@tonic-gate 	ksp = kstat_create_zone("unix", cp->cp_id, "pset", "misc",
1887c478bd9Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED,
1897c478bd9Sstevel@tonic-gate 	    sizeof (cpupart_kstat_t) / sizeof (kstat_named_t), 0, zoneid);
1907c478bd9Sstevel@tonic-gate 	if (ksp != NULL) {
1917c478bd9Sstevel@tonic-gate 		cpupart_kstat_t *cpksp = ksp->ks_data;
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_updates, "updates",
1947c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1957c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_runnable, "runnable",
1967c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1977c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_waiting, "waiting",
1987c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1997c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_ncpus, "ncpus",
2007c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2017c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_1min, "avenrun_1min",
2027c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2037c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_5min, "avenrun_5min",
2047c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2057c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_15min, "avenrun_15min",
2067c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 		ksp->ks_update = cpupart_kstat_update;
2097c478bd9Sstevel@tonic-gate 		ksp->ks_private = cp;
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate 		kstat_install(ksp);
2127c478bd9Sstevel@tonic-gate 	}
2137c478bd9Sstevel@tonic-gate 	cp->cp_kstat = ksp;
2147c478bd9Sstevel@tonic-gate }
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate /*
2177c478bd9Sstevel@tonic-gate  * Initialize the default partition and kpreempt disp queue.
2187c478bd9Sstevel@tonic-gate  */
2197c478bd9Sstevel@tonic-gate void
2207c478bd9Sstevel@tonic-gate cpupart_initialize_default(void)
2217c478bd9Sstevel@tonic-gate {
2227c478bd9Sstevel@tonic-gate 	lgrp_id_t i;
2237c478bd9Sstevel@tonic-gate 
2247c478bd9Sstevel@tonic-gate 	cp_list_head = &cp_default;
2257c478bd9Sstevel@tonic-gate 	cp_default.cp_next = &cp_default;
2267c478bd9Sstevel@tonic-gate 	cp_default.cp_prev = &cp_default;
2277c478bd9Sstevel@tonic-gate 	cp_default.cp_id = CP_DEFAULT;
2287c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_maxrunpri = -1;
2297c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_max_unbound_pri = -1;
2307c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_cpu = NULL;
2317c478bd9Sstevel@tonic-gate 	cp_default.cp_gen = 0;
2327c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_cur = 0;
2337c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_len = 0;
2347c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_total = 0;
2357c478bd9Sstevel@tonic-gate 	for (i = 0; i < S_LOADAVG_SZ; i++) {
2367c478bd9Sstevel@tonic-gate 		cp_default.cp_loadavg.lg_loads[i] = 0;
2377c478bd9Sstevel@tonic-gate 	}
238*e824d57fSjohnlev 	CPUSET_ZERO(cp_default.cp_mach->mc_haltset);
2397c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&cp_default.cp_kp_queue.disp_lock);
2407c478bd9Sstevel@tonic-gate 	cp_id_next = CP_DEFAULT + 1;
2417c478bd9Sstevel@tonic-gate 	cpupart_kstat_create(&cp_default);
2427c478bd9Sstevel@tonic-gate 	cp_numparts = 1;
2437c478bd9Sstevel@tonic-gate 	if (cp_max_numparts == 0)	/* allow for /etc/system tuning */
2447c478bd9Sstevel@tonic-gate 		cp_max_numparts = max_ncpus * 2 + 1;
2457c478bd9Sstevel@tonic-gate 	/*
2467c478bd9Sstevel@tonic-gate 	 * Allocate space for cp_default list of lgrploads
2477c478bd9Sstevel@tonic-gate 	 */
2487c478bd9Sstevel@tonic-gate 	cp_default.cp_nlgrploads = lgrp_plat_max_lgrps();
2497c478bd9Sstevel@tonic-gate 	cp_default.cp_lgrploads = kmem_zalloc(sizeof (lpl_t) *
2507c478bd9Sstevel@tonic-gate 	    cp_default.cp_nlgrploads, KM_SLEEP);
2517c478bd9Sstevel@tonic-gate 
2527c478bd9Sstevel@tonic-gate 	/*
2537c478bd9Sstevel@tonic-gate 	 * The initial lpl topology is created in a special lpl list
2547c478bd9Sstevel@tonic-gate 	 * lpl_bootstrap. It should be copied to cp_default.
2557c478bd9Sstevel@tonic-gate 	 * NOTE: lpl_topo_bootstrap() also updates CPU0 cpu_lpl pointer to point
2567c478bd9Sstevel@tonic-gate 	 *	 to the correct lpl in the cp_default.cp_lgrploads list.
2577c478bd9Sstevel@tonic-gate 	 */
2587c478bd9Sstevel@tonic-gate 	lpl_topo_bootstrap(cp_default.cp_lgrploads,
2597c478bd9Sstevel@tonic-gate 	    cp_default.cp_nlgrploads);
2607c478bd9Sstevel@tonic-gate 
2617c478bd9Sstevel@tonic-gate 	for (i = 0; i < cp_default.cp_nlgrploads; i++) {
2627c478bd9Sstevel@tonic-gate 		cp_default.cp_lgrploads[i].lpl_lgrpid = i;
2637c478bd9Sstevel@tonic-gate 	}
2647c478bd9Sstevel@tonic-gate 	cp_default.cp_attr = PSET_NOESCAPE;
2657c478bd9Sstevel@tonic-gate 	cp_numparts_nonempty = 1;
2667c478bd9Sstevel@tonic-gate 	/*
2677c478bd9Sstevel@tonic-gate 	 * Set t0's home
2687c478bd9Sstevel@tonic-gate 	 */
2697c478bd9Sstevel@tonic-gate 	t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID];
2707c478bd9Sstevel@tonic-gate }
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate 
2737c478bd9Sstevel@tonic-gate static int
2747c478bd9Sstevel@tonic-gate cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
2757c478bd9Sstevel@tonic-gate {
2767c478bd9Sstevel@tonic-gate 	cpupart_t *oldpp;
2777c478bd9Sstevel@tonic-gate 	cpu_t	*ncp, *newlist;
2787c478bd9Sstevel@tonic-gate 	kthread_t *t;
2797c478bd9Sstevel@tonic-gate 	int	move_threads = 1;
2807c478bd9Sstevel@tonic-gate 	lgrp_id_t lgrpid;
2817c478bd9Sstevel@tonic-gate 	proc_t 	*p;
2827c478bd9Sstevel@tonic-gate 	int lgrp_diff_lpl;
2837c478bd9Sstevel@tonic-gate 	lpl_t	*cpu_lpl;
2847c478bd9Sstevel@tonic-gate 	int	ret;
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
2877c478bd9Sstevel@tonic-gate 	ASSERT(newpp != NULL);
2887c478bd9Sstevel@tonic-gate 
2897c478bd9Sstevel@tonic-gate 	oldpp = cp->cpu_part;
2907c478bd9Sstevel@tonic-gate 	ASSERT(oldpp != NULL);
2917c478bd9Sstevel@tonic-gate 	ASSERT(oldpp->cp_ncpus > 0);
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate 	if (newpp == oldpp) {
2947c478bd9Sstevel@tonic-gate 		/*
2957c478bd9Sstevel@tonic-gate 		 * Don't need to do anything.
2967c478bd9Sstevel@tonic-gate 		 */
2977c478bd9Sstevel@tonic-gate 		return (0);
2987c478bd9Sstevel@tonic-gate 	}
2997c478bd9Sstevel@tonic-gate 
3007c478bd9Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT);
3017c478bd9Sstevel@tonic-gate 
3027c478bd9Sstevel@tonic-gate 	if (!disp_bound_partition(cp, 0)) {
3037c478bd9Sstevel@tonic-gate 		/*
3047c478bd9Sstevel@tonic-gate 		 * Don't need to move threads if there are no threads in
3057c478bd9Sstevel@tonic-gate 		 * the partition.  Note that threads can't enter the
3067c478bd9Sstevel@tonic-gate 		 * partition while we're holding cpu_lock.
3077c478bd9Sstevel@tonic-gate 		 */
3087c478bd9Sstevel@tonic-gate 		move_threads = 0;
3097c478bd9Sstevel@tonic-gate 	} else if (oldpp->cp_ncpus == 1) {
3107c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3117c478bd9Sstevel@tonic-gate 		return (EBUSY);
3127c478bd9Sstevel@tonic-gate 	}
3137c478bd9Sstevel@tonic-gate 
3147c478bd9Sstevel@tonic-gate 	if (forced && (ret = cpu_unbind(cp->cpu_id)) != 0) {
3157c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3167c478bd9Sstevel@tonic-gate 		return (ret);
3177c478bd9Sstevel@tonic-gate 	}
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate 	/*
3207c478bd9Sstevel@tonic-gate 	 * Stop further threads weak binding to this cpu.
3217c478bd9Sstevel@tonic-gate 	 */
3227c478bd9Sstevel@tonic-gate 	cpu_inmotion = cp;
3237c478bd9Sstevel@tonic-gate 	membar_enter();
3247c478bd9Sstevel@tonic-gate 
3257c478bd9Sstevel@tonic-gate again:
3267c478bd9Sstevel@tonic-gate 	if (move_threads) {
3277c478bd9Sstevel@tonic-gate 		int loop_count;
3287c478bd9Sstevel@tonic-gate 		/*
3297c478bd9Sstevel@tonic-gate 		 * Check for threads strong or weak bound to this CPU.
3307c478bd9Sstevel@tonic-gate 		 */
3317c478bd9Sstevel@tonic-gate 		for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) {
3327c478bd9Sstevel@tonic-gate 			if (loop_count >= 5) {
3337c478bd9Sstevel@tonic-gate 				cpu_state_change_notify(cp->cpu_id,
3347c478bd9Sstevel@tonic-gate 				    CPU_CPUPART_IN);
3357c478bd9Sstevel@tonic-gate 				cpu_inmotion = NULL;
3367c478bd9Sstevel@tonic-gate 				return (EBUSY);	/* some threads still bound */
3377c478bd9Sstevel@tonic-gate 			}
3387c478bd9Sstevel@tonic-gate 			delay(1);
3397c478bd9Sstevel@tonic-gate 		}
3407c478bd9Sstevel@tonic-gate 	}
3417c478bd9Sstevel@tonic-gate 
3427c478bd9Sstevel@tonic-gate 	/*
3437c478bd9Sstevel@tonic-gate 	 * Before we actually start changing data structures, notify
3447c478bd9Sstevel@tonic-gate 	 * the cyclic subsystem that we want to move this CPU out of its
3457c478bd9Sstevel@tonic-gate 	 * partition.
3467c478bd9Sstevel@tonic-gate 	 */
3477c478bd9Sstevel@tonic-gate 	if (!cyclic_move_out(cp)) {
3487c478bd9Sstevel@tonic-gate 		/*
3497c478bd9Sstevel@tonic-gate 		 * This CPU must be the last CPU in a processor set with
3507c478bd9Sstevel@tonic-gate 		 * a bound cyclic.
3517c478bd9Sstevel@tonic-gate 		 */
3527c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3537c478bd9Sstevel@tonic-gate 		cpu_inmotion = NULL;
3547c478bd9Sstevel@tonic-gate 		return (EBUSY);
3557c478bd9Sstevel@tonic-gate 	}
3567c478bd9Sstevel@tonic-gate 
3577c478bd9Sstevel@tonic-gate 	pause_cpus(cp);
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	if (move_threads) {
3607c478bd9Sstevel@tonic-gate 		/*
3617c478bd9Sstevel@tonic-gate 		 * The thread on cpu before the pause thread may have read
3627c478bd9Sstevel@tonic-gate 		 * cpu_inmotion before we raised the barrier above.  Check
3637c478bd9Sstevel@tonic-gate 		 * again.
3647c478bd9Sstevel@tonic-gate 		 */
3657c478bd9Sstevel@tonic-gate 		if (disp_bound_threads(cp, 1)) {
3667c478bd9Sstevel@tonic-gate 			start_cpus();
3677c478bd9Sstevel@tonic-gate 			goto again;
3687c478bd9Sstevel@tonic-gate 		}
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate 	}
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate 	/*
3737c478bd9Sstevel@tonic-gate 	 * Update the set of chip's being spanned
3747c478bd9Sstevel@tonic-gate 	 */
3757c478bd9Sstevel@tonic-gate 	chip_cpu_move_part(cp, oldpp, newpp);
3767c478bd9Sstevel@tonic-gate 
3777c478bd9Sstevel@tonic-gate 	/* save this cpu's lgroup -- it'll be the same in the new partition */
3787c478bd9Sstevel@tonic-gate 	lgrpid = cp->cpu_lpl->lpl_lgrpid;
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	cpu_lpl = cp->cpu_lpl;
3817c478bd9Sstevel@tonic-gate 	/*
3827c478bd9Sstevel@tonic-gate 	 * let the lgroup framework know cp has left the partition
3837c478bd9Sstevel@tonic-gate 	 */
3847c478bd9Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid);
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 	/* move out of old partition */
3877c478bd9Sstevel@tonic-gate 	oldpp->cp_ncpus--;
3887c478bd9Sstevel@tonic-gate 	if (oldpp->cp_ncpus > 0) {
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 		ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
3917c478bd9Sstevel@tonic-gate 		cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
3927c478bd9Sstevel@tonic-gate 		if (oldpp->cp_cpulist == cp) {
3937c478bd9Sstevel@tonic-gate 			oldpp->cp_cpulist = ncp;
3947c478bd9Sstevel@tonic-gate 		}
3957c478bd9Sstevel@tonic-gate 	} else {
3967c478bd9Sstevel@tonic-gate 		ncp = oldpp->cp_cpulist = NULL;
3977c478bd9Sstevel@tonic-gate 		cp_numparts_nonempty--;
3987c478bd9Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
3997c478bd9Sstevel@tonic-gate 	}
4007c478bd9Sstevel@tonic-gate 	oldpp->cp_gen++;
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate 	/* move into new partition */
4037c478bd9Sstevel@tonic-gate 	newlist = newpp->cp_cpulist;
4047c478bd9Sstevel@tonic-gate 	if (newlist == NULL) {
4057c478bd9Sstevel@tonic-gate 		newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
4067c478bd9Sstevel@tonic-gate 		cp_numparts_nonempty++;
4077c478bd9Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
4087c478bd9Sstevel@tonic-gate 	} else {
4097c478bd9Sstevel@tonic-gate 		cp->cpu_next_part = newlist;
4107c478bd9Sstevel@tonic-gate 		cp->cpu_prev_part = newlist->cpu_prev_part;
4117c478bd9Sstevel@tonic-gate 		newlist->cpu_prev_part->cpu_next_part = cp;
4127c478bd9Sstevel@tonic-gate 		newlist->cpu_prev_part = cp;
4137c478bd9Sstevel@tonic-gate 	}
4147c478bd9Sstevel@tonic-gate 	cp->cpu_part = newpp;
4157c478bd9Sstevel@tonic-gate 	newpp->cp_ncpus++;
4167c478bd9Sstevel@tonic-gate 	newpp->cp_gen++;
4177c478bd9Sstevel@tonic-gate 
418*e824d57fSjohnlev 	ASSERT(CPUSET_ISNULL(newpp->cp_mach->mc_haltset));
419*e824d57fSjohnlev 	ASSERT(CPUSET_ISNULL(oldpp->cp_mach->mc_haltset));
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate 	/*
4227c478bd9Sstevel@tonic-gate 	 * let the lgroup framework know cp has entered the partition
4237c478bd9Sstevel@tonic-gate 	 */
4247c478bd9Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid);
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate 	/*
4277c478bd9Sstevel@tonic-gate 	 * If necessary, move threads off processor.
4287c478bd9Sstevel@tonic-gate 	 */
4297c478bd9Sstevel@tonic-gate 	if (move_threads) {
4307c478bd9Sstevel@tonic-gate 		ASSERT(ncp != NULL);
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 		/*
4337c478bd9Sstevel@tonic-gate 		 * Walk thru the active process list to look for
4347c478bd9Sstevel@tonic-gate 		 * threads that need to have a new home lgroup,
4357c478bd9Sstevel@tonic-gate 		 * or the last CPU they run on is the same CPU
4367c478bd9Sstevel@tonic-gate 		 * being moved out of the partition.
4377c478bd9Sstevel@tonic-gate 		 */
4387c478bd9Sstevel@tonic-gate 
4397c478bd9Sstevel@tonic-gate 		for (p = practive; p != NULL; p = p->p_next) {
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate 			t = p->p_tlist;
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate 			if (t == NULL)
4447c478bd9Sstevel@tonic-gate 				continue;
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate 			lgrp_diff_lpl = 0;
4477c478bd9Sstevel@tonic-gate 
4487c478bd9Sstevel@tonic-gate 			do {
4497c478bd9Sstevel@tonic-gate 
4507c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl != NULL);
4517c478bd9Sstevel@tonic-gate 
4527c478bd9Sstevel@tonic-gate 				/*
4537c478bd9Sstevel@tonic-gate 				 * Update the count of how many threads are
4547c478bd9Sstevel@tonic-gate 				 * in this CPU's lgroup but have a different lpl
4557c478bd9Sstevel@tonic-gate 				 */
4567c478bd9Sstevel@tonic-gate 
4577c478bd9Sstevel@tonic-gate 				if (t->t_lpl != cpu_lpl &&
4587c478bd9Sstevel@tonic-gate 				    t->t_lpl->lpl_lgrpid == lgrpid)
4597c478bd9Sstevel@tonic-gate 					lgrp_diff_lpl++;
4607c478bd9Sstevel@tonic-gate 				/*
4617c478bd9Sstevel@tonic-gate 				 * If the lgroup that t is assigned to no
4627c478bd9Sstevel@tonic-gate 				 * longer has any CPUs in t's partition,
4637c478bd9Sstevel@tonic-gate 				 * we'll have to choose a new lgroup for t.
4647c478bd9Sstevel@tonic-gate 				 */
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 				if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
4677c478bd9Sstevel@tonic-gate 				    t->t_cpupart)) {
4687c478bd9Sstevel@tonic-gate 					lgrp_move_thread(t,
4697c478bd9Sstevel@tonic-gate 					    lgrp_choose(t, t->t_cpupart), 0);
4707c478bd9Sstevel@tonic-gate 				}
4717c478bd9Sstevel@tonic-gate 
4727c478bd9Sstevel@tonic-gate 				/*
4737c478bd9Sstevel@tonic-gate 				 * make sure lpl points to our own partition
4747c478bd9Sstevel@tonic-gate 				 */
4757c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads &&
4767c478bd9Sstevel@tonic-gate 				    (t->t_lpl < t->t_cpupart->cp_lgrploads +
4777c478bd9Sstevel@tonic-gate 					t->t_cpupart->cp_nlgrploads));
4787c478bd9Sstevel@tonic-gate 
4797c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl->lpl_ncpu > 0);
4807c478bd9Sstevel@tonic-gate 
4817c478bd9Sstevel@tonic-gate 				/* Update CPU last ran on if it was this CPU */
4827c478bd9Sstevel@tonic-gate 				if (t->t_cpu == cp && t->t_cpupart == oldpp &&
4837c478bd9Sstevel@tonic-gate 				    t->t_bound_cpu != cp) {
4847c478bd9Sstevel@tonic-gate 					t->t_cpu = disp_lowpri_cpu(ncp,
4857c478bd9Sstevel@tonic-gate 					    t->t_lpl, t->t_pri, NULL);
4867c478bd9Sstevel@tonic-gate 				}
4877c478bd9Sstevel@tonic-gate 				t = t->t_forw;
4887c478bd9Sstevel@tonic-gate 			} while (t != p->p_tlist);
4897c478bd9Sstevel@tonic-gate 
4907c478bd9Sstevel@tonic-gate 			/*
4917c478bd9Sstevel@tonic-gate 			 * Didn't find any threads in the same lgroup as this
4927c478bd9Sstevel@tonic-gate 			 * CPU with a different lpl, so remove the lgroup from
4937c478bd9Sstevel@tonic-gate 			 * the process lgroup bitmask.
4947c478bd9Sstevel@tonic-gate 			 */
4957c478bd9Sstevel@tonic-gate 
4967c478bd9Sstevel@tonic-gate 			if (lgrp_diff_lpl)
4977c478bd9Sstevel@tonic-gate 				klgrpset_del(p->p_lgrpset, lgrpid);
4987c478bd9Sstevel@tonic-gate 		}
4997c478bd9Sstevel@tonic-gate 
5007c478bd9Sstevel@tonic-gate 		/*
5017c478bd9Sstevel@tonic-gate 		 * Walk thread list looking for threads that need to be
5027c478bd9Sstevel@tonic-gate 		 * rehomed, since there are some threads that are not in
5037c478bd9Sstevel@tonic-gate 		 * their process's p_tlist.
5047c478bd9Sstevel@tonic-gate 		 */
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate 		t = curthread;
5077c478bd9Sstevel@tonic-gate 
5087c478bd9Sstevel@tonic-gate 		do {
5097c478bd9Sstevel@tonic-gate 			ASSERT(t != NULL && t->t_lpl != NULL);
5107c478bd9Sstevel@tonic-gate 
5117c478bd9Sstevel@tonic-gate 			/*
5127c478bd9Sstevel@tonic-gate 			 * If the lgroup that t is assigned to no
5137c478bd9Sstevel@tonic-gate 			 * longer has any CPUs in t's partition,
5147c478bd9Sstevel@tonic-gate 			 * we'll have to choose a new lgroup for t.
5157c478bd9Sstevel@tonic-gate 			 * Also, choose best lgroup for home when
5167c478bd9Sstevel@tonic-gate 			 * thread has specified lgroup affinities,
5177c478bd9Sstevel@tonic-gate 			 * since there may be an lgroup with more
5187c478bd9Sstevel@tonic-gate 			 * affinity available after moving CPUs
5197c478bd9Sstevel@tonic-gate 			 * around.
5207c478bd9Sstevel@tonic-gate 			 */
5217c478bd9Sstevel@tonic-gate 			if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
5227c478bd9Sstevel@tonic-gate 			    t->t_cpupart) || t->t_lgrp_affinity) {
5237c478bd9Sstevel@tonic-gate 				lgrp_move_thread(t,
5247c478bd9Sstevel@tonic-gate 				    lgrp_choose(t, t->t_cpupart), 1);
5257c478bd9Sstevel@tonic-gate 			}
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate 			/* make sure lpl points to our own partition */
5287c478bd9Sstevel@tonic-gate 			ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) &&
5297c478bd9Sstevel@tonic-gate 			    (t->t_lpl < t->t_cpupart->cp_lgrploads +
5307c478bd9Sstevel@tonic-gate 				t->t_cpupart->cp_nlgrploads));
5317c478bd9Sstevel@tonic-gate 
5327c478bd9Sstevel@tonic-gate 			ASSERT(t->t_lpl->lpl_ncpu > 0);
5337c478bd9Sstevel@tonic-gate 
5347c478bd9Sstevel@tonic-gate 			/* Update CPU last ran on if it was this CPU */
5357c478bd9Sstevel@tonic-gate 			if (t->t_cpu == cp && t->t_cpupart == oldpp &&
5367c478bd9Sstevel@tonic-gate 			    t->t_bound_cpu != cp) {
5377c478bd9Sstevel@tonic-gate 				t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl,
5387c478bd9Sstevel@tonic-gate 				    t->t_pri, NULL);
5397c478bd9Sstevel@tonic-gate 			}
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate 			t = t->t_next;
5427c478bd9Sstevel@tonic-gate 		} while (t != curthread);
5437c478bd9Sstevel@tonic-gate 
5447c478bd9Sstevel@tonic-gate 		/*
5457c478bd9Sstevel@tonic-gate 		 * Clear off the CPU's run queue, and the kp queue if the
5467c478bd9Sstevel@tonic-gate 		 * partition is now empty.
5477c478bd9Sstevel@tonic-gate 		 */
5487c478bd9Sstevel@tonic-gate 		disp_cpu_inactive(cp);
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 		/*
5517c478bd9Sstevel@tonic-gate 		 * Make cp switch to a thread from the new partition.
5527c478bd9Sstevel@tonic-gate 		 */
5537c478bd9Sstevel@tonic-gate 		cp->cpu_runrun = 1;
5547c478bd9Sstevel@tonic-gate 		cp->cpu_kprunrun = 1;
5557c478bd9Sstevel@tonic-gate 	}
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate 	cpu_inmotion = NULL;
5587c478bd9Sstevel@tonic-gate 	start_cpus();
5597c478bd9Sstevel@tonic-gate 
5607c478bd9Sstevel@tonic-gate 	/*
5617c478bd9Sstevel@tonic-gate 	 * Let anyone interested know that cpu has been added to the set.
5627c478bd9Sstevel@tonic-gate 	 */
5637c478bd9Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
5647c478bd9Sstevel@tonic-gate 
5657c478bd9Sstevel@tonic-gate 	/*
5667c478bd9Sstevel@tonic-gate 	 * Now let the cyclic subsystem know that it can reshuffle cyclics
5677c478bd9Sstevel@tonic-gate 	 * bound to the new processor set.
5687c478bd9Sstevel@tonic-gate 	 */
5697c478bd9Sstevel@tonic-gate 	cyclic_move_in(cp);
5707c478bd9Sstevel@tonic-gate 
5717c478bd9Sstevel@tonic-gate 	return (0);
5727c478bd9Sstevel@tonic-gate }
5737c478bd9Sstevel@tonic-gate 
5747c478bd9Sstevel@tonic-gate /*
5757c478bd9Sstevel@tonic-gate  * Check if thread can be moved to a new cpu partition.  Called by
5767c478bd9Sstevel@tonic-gate  * cpupart_move_thread() and pset_bind_start().
5777c478bd9Sstevel@tonic-gate  */
5787c478bd9Sstevel@tonic-gate int
5797c478bd9Sstevel@tonic-gate cpupart_movable_thread(kthread_id_t tp, cpupart_t *cp, int ignore)
5807c478bd9Sstevel@tonic-gate {
5817c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
5827c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
5837c478bd9Sstevel@tonic-gate 	ASSERT(cp != NULL);
5847c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
5857c478bd9Sstevel@tonic-gate 
5867c478bd9Sstevel@tonic-gate 	/*
5877c478bd9Sstevel@tonic-gate 	 * CPU-bound threads can't be moved.
5887c478bd9Sstevel@tonic-gate 	 */
5897c478bd9Sstevel@tonic-gate 	if (!ignore) {
5907c478bd9Sstevel@tonic-gate 		cpu_t *boundcpu = tp->t_bound_cpu ? tp->t_bound_cpu :
5917c478bd9Sstevel@tonic-gate 		    tp->t_weakbound_cpu;
5927c478bd9Sstevel@tonic-gate 		if (boundcpu != NULL && boundcpu->cpu_part != cp)
5937c478bd9Sstevel@tonic-gate 			return (EBUSY);
5947c478bd9Sstevel@tonic-gate 	}
5957c478bd9Sstevel@tonic-gate 	return (0);
5967c478bd9Sstevel@tonic-gate }
5977c478bd9Sstevel@tonic-gate 
5987c478bd9Sstevel@tonic-gate /*
5997c478bd9Sstevel@tonic-gate  * Move thread to new partition.  If ignore is non-zero, then CPU
6007c478bd9Sstevel@tonic-gate  * bindings should be ignored (this is used when destroying a
6017c478bd9Sstevel@tonic-gate  * partition).
6027c478bd9Sstevel@tonic-gate  */
6037c478bd9Sstevel@tonic-gate static int
6047c478bd9Sstevel@tonic-gate cpupart_move_thread(kthread_id_t tp, cpupart_t *newpp, int ignore,
6057c478bd9Sstevel@tonic-gate     void *projbuf, void *zonebuf)
6067c478bd9Sstevel@tonic-gate {
6077c478bd9Sstevel@tonic-gate 	cpupart_t *oldpp = tp->t_cpupart;
6087c478bd9Sstevel@tonic-gate 	int ret;
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
6117c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
6127c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
6137c478bd9Sstevel@tonic-gate 	ASSERT(newpp != NULL);
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate 	if (newpp->cp_cpulist == NULL)
6167c478bd9Sstevel@tonic-gate 		return (EINVAL);
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate 	/*
6197c478bd9Sstevel@tonic-gate 	 * Check for errors first.
6207c478bd9Sstevel@tonic-gate 	 */
6217c478bd9Sstevel@tonic-gate 	thread_lock(tp);
6227c478bd9Sstevel@tonic-gate 	if ((ret = cpupart_movable_thread(tp, newpp, ignore)) != 0) {
6237c478bd9Sstevel@tonic-gate 		thread_unlock(tp);
6247c478bd9Sstevel@tonic-gate 		return (ret);
6257c478bd9Sstevel@tonic-gate 	}
6267c478bd9Sstevel@tonic-gate 
6277c478bd9Sstevel@tonic-gate 	/* move the thread */
6287c478bd9Sstevel@tonic-gate 	if (oldpp != newpp) {
6297c478bd9Sstevel@tonic-gate 		/*
6307c478bd9Sstevel@tonic-gate 		 * Make the thread switch to the new partition.
6317c478bd9Sstevel@tonic-gate 		 */
6327c478bd9Sstevel@tonic-gate 		tp->t_cpupart = newpp;
6337c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_lpl != NULL);
6347c478bd9Sstevel@tonic-gate 		/*
6357c478bd9Sstevel@tonic-gate 		 * Leave the thread on the same lgroup if possible; otherwise
6367c478bd9Sstevel@tonic-gate 		 * choose a new lgroup for it.  In either case, update its
6377c478bd9Sstevel@tonic-gate 		 * t_lpl.
6387c478bd9Sstevel@tonic-gate 		 */
6397c478bd9Sstevel@tonic-gate 		if (LGRP_CPUS_IN_PART(tp->t_lpl->lpl_lgrpid, newpp) &&
6407c478bd9Sstevel@tonic-gate 		    tp->t_lgrp_affinity == NULL) {
6417c478bd9Sstevel@tonic-gate 			/*
6427c478bd9Sstevel@tonic-gate 			 * The thread's lgroup has CPUs in the thread's new
6437c478bd9Sstevel@tonic-gate 			 * partition, so the thread can stay assigned to the
6447c478bd9Sstevel@tonic-gate 			 * same lgroup.  Update its t_lpl to point to the
6457c478bd9Sstevel@tonic-gate 			 * lpl_t for its lgroup in its new partition.
6467c478bd9Sstevel@tonic-gate 			 */
6477c478bd9Sstevel@tonic-gate 			lgrp_move_thread(tp, &tp->t_cpupart->\
6487c478bd9Sstevel@tonic-gate 			    cp_lgrploads[tp->t_lpl->lpl_lgrpid], 1);
6497c478bd9Sstevel@tonic-gate 		} else {
6507c478bd9Sstevel@tonic-gate 			/*
6517c478bd9Sstevel@tonic-gate 			 * The thread's lgroup has no cpus in its new
6527c478bd9Sstevel@tonic-gate 			 * partition or it has specified lgroup affinities,
6537c478bd9Sstevel@tonic-gate 			 * so choose the best lgroup for the thread and
6547c478bd9Sstevel@tonic-gate 			 * assign it to that lgroup.
6557c478bd9Sstevel@tonic-gate 			 */
6567c478bd9Sstevel@tonic-gate 			lgrp_move_thread(tp, lgrp_choose(tp, tp->t_cpupart),
6577c478bd9Sstevel@tonic-gate 			    1);
6587c478bd9Sstevel@tonic-gate 		}
6597c478bd9Sstevel@tonic-gate 		/*
6607c478bd9Sstevel@tonic-gate 		 * make sure lpl points to our own partition
6617c478bd9Sstevel@tonic-gate 		 */
6627c478bd9Sstevel@tonic-gate 		ASSERT((tp->t_lpl >= tp->t_cpupart->cp_lgrploads) &&
6637c478bd9Sstevel@tonic-gate 		    (tp->t_lpl < tp->t_cpupart->cp_lgrploads +
6647c478bd9Sstevel@tonic-gate 			tp->t_cpupart->cp_nlgrploads));
6657c478bd9Sstevel@tonic-gate 
6667c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_lpl->lpl_ncpu > 0);
6677c478bd9Sstevel@tonic-gate 
6687c478bd9Sstevel@tonic-gate 		if (tp->t_state == TS_ONPROC) {
6697c478bd9Sstevel@tonic-gate 			cpu_surrender(tp);
6707c478bd9Sstevel@tonic-gate 		} else if (tp->t_state == TS_RUN) {
6717c478bd9Sstevel@tonic-gate 			(void) dispdeq(tp);
6727c478bd9Sstevel@tonic-gate 			setbackdq(tp);
6737c478bd9Sstevel@tonic-gate 		}
6747c478bd9Sstevel@tonic-gate 	}
6757c478bd9Sstevel@tonic-gate 
6767c478bd9Sstevel@tonic-gate 	/*
6777c478bd9Sstevel@tonic-gate 	 * Our binding has changed; set TP_CHANGEBIND.
6787c478bd9Sstevel@tonic-gate 	 */
6797c478bd9Sstevel@tonic-gate 	tp->t_proc_flag |= TP_CHANGEBIND;
6807c478bd9Sstevel@tonic-gate 	aston(tp);
6817c478bd9Sstevel@tonic-gate 
6827c478bd9Sstevel@tonic-gate 	thread_unlock(tp);
6837c478bd9Sstevel@tonic-gate 	fss_changepset(tp, newpp, projbuf, zonebuf);
6847c478bd9Sstevel@tonic-gate 
6857c478bd9Sstevel@tonic-gate 	return (0);		/* success */
6867c478bd9Sstevel@tonic-gate }
6877c478bd9Sstevel@tonic-gate 
6887c478bd9Sstevel@tonic-gate 
6897c478bd9Sstevel@tonic-gate /*
6907c478bd9Sstevel@tonic-gate  * This function binds a thread to a partition.  Must be called with the
6917c478bd9Sstevel@tonic-gate  * p_lock of the containing process held (to keep the thread from going
6927c478bd9Sstevel@tonic-gate  * away), and thus also with cpu_lock held (since cpu_lock must be
6937c478bd9Sstevel@tonic-gate  * acquired before p_lock).  If ignore is non-zero, then CPU bindings
6947c478bd9Sstevel@tonic-gate  * should be ignored (this is used when destroying a partition).
6957c478bd9Sstevel@tonic-gate  */
6967c478bd9Sstevel@tonic-gate int
6977c478bd9Sstevel@tonic-gate cpupart_bind_thread(kthread_id_t tp, psetid_t psid, int ignore, void *projbuf,
6987c478bd9Sstevel@tonic-gate     void *zonebuf)
6997c478bd9Sstevel@tonic-gate {
7007c478bd9Sstevel@tonic-gate 	cpupart_t	*newpp;
7017c478bd9Sstevel@tonic-gate 
7027c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7037c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
7047c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7057c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
7067c478bd9Sstevel@tonic-gate 
7077c478bd9Sstevel@tonic-gate 	if (psid == PS_NONE)
7087c478bd9Sstevel@tonic-gate 		newpp = &cp_default;
7097c478bd9Sstevel@tonic-gate 	else {
7107c478bd9Sstevel@tonic-gate 		newpp = cpupart_find(psid);
7117c478bd9Sstevel@tonic-gate 		if (newpp == NULL) {
7127c478bd9Sstevel@tonic-gate 			return (EINVAL);
7137c478bd9Sstevel@tonic-gate 		}
7147c478bd9Sstevel@tonic-gate 	}
7157c478bd9Sstevel@tonic-gate 	return (cpupart_move_thread(tp, newpp, ignore, projbuf, zonebuf));
7167c478bd9Sstevel@tonic-gate }
7177c478bd9Sstevel@tonic-gate 
7187c478bd9Sstevel@tonic-gate 
7197c478bd9Sstevel@tonic-gate /*
7207c478bd9Sstevel@tonic-gate  * Create a new partition.  On MP systems, this also allocates a
7217c478bd9Sstevel@tonic-gate  * kpreempt disp queue for that partition.
7227c478bd9Sstevel@tonic-gate  */
7237c478bd9Sstevel@tonic-gate int
7247c478bd9Sstevel@tonic-gate cpupart_create(psetid_t *psid)
7257c478bd9Sstevel@tonic-gate {
7267c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
7277c478bd9Sstevel@tonic-gate 	lgrp_id_t	i;
7287c478bd9Sstevel@tonic-gate 
7297c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7307c478bd9Sstevel@tonic-gate 
7317c478bd9Sstevel@tonic-gate 	pp = kmem_zalloc(sizeof (cpupart_t), KM_SLEEP);
732*e824d57fSjohnlev 	pp->cp_mach = kmem_zalloc(sizeof (struct mach_cpupart), KM_SLEEP);
7337c478bd9Sstevel@tonic-gate 	pp->cp_nlgrploads = lgrp_plat_max_lgrps();
7347c478bd9Sstevel@tonic-gate 	pp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * pp->cp_nlgrploads,
7357c478bd9Sstevel@tonic-gate 	    KM_SLEEP);
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
7387c478bd9Sstevel@tonic-gate 	if (cp_numparts == cp_max_numparts) {
7397c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
7407c478bd9Sstevel@tonic-gate 		kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
7417c478bd9Sstevel@tonic-gate 		pp->cp_lgrploads = NULL;
742*e824d57fSjohnlev 		kmem_free(pp->cp_mach, sizeof (struct mach_cpupart));
7437c478bd9Sstevel@tonic-gate 		kmem_free(pp, sizeof (cpupart_t));
7447c478bd9Sstevel@tonic-gate 		return (ENOMEM);
7457c478bd9Sstevel@tonic-gate 	}
7467c478bd9Sstevel@tonic-gate 	cp_numparts++;
7477c478bd9Sstevel@tonic-gate 	/* find the next free partition ID */
7487c478bd9Sstevel@tonic-gate 	while (cpupart_find(CPTOPS(cp_id_next)) != NULL)
7497c478bd9Sstevel@tonic-gate 		cp_id_next++;
7507c478bd9Sstevel@tonic-gate 	pp->cp_id = cp_id_next++;
7517c478bd9Sstevel@tonic-gate 	pp->cp_ncpus = 0;
7527c478bd9Sstevel@tonic-gate 	pp->cp_cpulist = NULL;
7537c478bd9Sstevel@tonic-gate 	pp->cp_attr = 0;
7547c478bd9Sstevel@tonic-gate 	klgrpset_clear(pp->cp_lgrpset);
7557c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_maxrunpri = -1;
7567c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_max_unbound_pri = -1;
7577c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_cpu = NULL;
7587c478bd9Sstevel@tonic-gate 	pp->cp_gen = 0;
759*e824d57fSjohnlev 	CPUSET_ZERO(pp->cp_mach->mc_haltset);
7607c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&pp->cp_kp_queue.disp_lock);
7617c478bd9Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
7627c478bd9Sstevel@tonic-gate 	disp_kp_alloc(&pp->cp_kp_queue, v.v_nglobpris);
7637c478bd9Sstevel@tonic-gate 	cpupart_kstat_create(pp);
7647c478bd9Sstevel@tonic-gate 	for (i = 0; i < pp->cp_nlgrploads; i++) {
7657c478bd9Sstevel@tonic-gate 		pp->cp_lgrploads[i].lpl_lgrpid = i;
7667c478bd9Sstevel@tonic-gate 	}
767*e824d57fSjohnlev 	CHIP_SET_ZERO(pp->cp_mach->mc_chipset);
7687c478bd9Sstevel@tonic-gate 
7697c478bd9Sstevel@tonic-gate 	/*
7707c478bd9Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
7717c478bd9Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
7727c478bd9Sstevel@tonic-gate 	 * cpu_lock) isn't running.
7737c478bd9Sstevel@tonic-gate 	 */
7747c478bd9Sstevel@tonic-gate 	pause_cpus(NULL);
7757c478bd9Sstevel@tonic-gate 	pp->cp_next = cp_list_head;
7767c478bd9Sstevel@tonic-gate 	pp->cp_prev = cp_list_head->cp_prev;
7777c478bd9Sstevel@tonic-gate 	cp_list_head->cp_prev->cp_next = pp;
7787c478bd9Sstevel@tonic-gate 	cp_list_head->cp_prev = pp;
7797c478bd9Sstevel@tonic-gate 	start_cpus();
7807c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
7817c478bd9Sstevel@tonic-gate 
7827c478bd9Sstevel@tonic-gate 	return (0);
7837c478bd9Sstevel@tonic-gate }
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 
7867c478bd9Sstevel@tonic-gate /*
7877c478bd9Sstevel@tonic-gate  * Destroy a partition.
7887c478bd9Sstevel@tonic-gate  */
7897c478bd9Sstevel@tonic-gate int
7907c478bd9Sstevel@tonic-gate cpupart_destroy(psetid_t psid)
7917c478bd9Sstevel@tonic-gate {
7927c478bd9Sstevel@tonic-gate 	cpu_t	*cp, *first_cp;
7937c478bd9Sstevel@tonic-gate 	cpupart_t *pp, *newpp;
7947c478bd9Sstevel@tonic-gate 	int	err = 0;
7957c478bd9Sstevel@tonic-gate 	void 	*projbuf, *zonebuf;
7967c478bd9Sstevel@tonic-gate 	kthread_t *t;
7977c478bd9Sstevel@tonic-gate 	proc_t	*p;
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8007c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
8017c478bd9Sstevel@tonic-gate 
8027c478bd9Sstevel@tonic-gate 	pp = cpupart_find(psid);
8037c478bd9Sstevel@tonic-gate 	if (pp == NULL || pp == &cp_default) {
8047c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
8057c478bd9Sstevel@tonic-gate 		return (EINVAL);
8067c478bd9Sstevel@tonic-gate 	}
8077c478bd9Sstevel@tonic-gate 
8087c478bd9Sstevel@tonic-gate 	/*
8097c478bd9Sstevel@tonic-gate 	 * Pre-allocate enough buffers for FSS for all active projects and
8107c478bd9Sstevel@tonic-gate 	 * for all active zones on the system.  Unused buffers will be
8117c478bd9Sstevel@tonic-gate 	 * freed later by fss_freebuf().
8127c478bd9Sstevel@tonic-gate 	 */
8137c478bd9Sstevel@tonic-gate 	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
8147c478bd9Sstevel@tonic-gate 	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
8157c478bd9Sstevel@tonic-gate 
8167c478bd9Sstevel@tonic-gate 	/*
8177c478bd9Sstevel@tonic-gate 	 * First need to unbind all the threads currently bound to the
8187c478bd9Sstevel@tonic-gate 	 * partition.  Then do the actual destroy (which moves the CPUs).
8197c478bd9Sstevel@tonic-gate 	 */
8207c478bd9Sstevel@tonic-gate 	mutex_enter(&pidlock);
8217c478bd9Sstevel@tonic-gate 	t = curthread;
8227c478bd9Sstevel@tonic-gate 	do {
8237c478bd9Sstevel@tonic-gate 		if (t->t_bind_pset == psid) {
8247c478bd9Sstevel@tonic-gate again:			p = ttoproc(t);
8257c478bd9Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
8267c478bd9Sstevel@tonic-gate 			if (ttoproc(t) != p) {
8277c478bd9Sstevel@tonic-gate 				/*
8287c478bd9Sstevel@tonic-gate 				 * lwp_exit has changed this thread's process
8297c478bd9Sstevel@tonic-gate 				 * pointer before we grabbed its p_lock.
8307c478bd9Sstevel@tonic-gate 				 */
8317c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
8327c478bd9Sstevel@tonic-gate 				goto again;
8337c478bd9Sstevel@tonic-gate 			}
8347c478bd9Sstevel@tonic-gate 			err = cpupart_bind_thread(t, PS_NONE, 1,
8357c478bd9Sstevel@tonic-gate 			    projbuf, zonebuf);
8367c478bd9Sstevel@tonic-gate 			if (err) {
8377c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
8387c478bd9Sstevel@tonic-gate 				mutex_exit(&pidlock);
8397c478bd9Sstevel@tonic-gate 				mutex_exit(&cpu_lock);
8407c478bd9Sstevel@tonic-gate 				fss_freebuf(projbuf, FSS_ALLOC_PROJ);
8417c478bd9Sstevel@tonic-gate 				fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
8427c478bd9Sstevel@tonic-gate 				return (err);
8437c478bd9Sstevel@tonic-gate 			}
8447c478bd9Sstevel@tonic-gate 			t->t_bind_pset = PS_NONE;
8457c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
8467c478bd9Sstevel@tonic-gate 		}
8477c478bd9Sstevel@tonic-gate 		t = t->t_next;
8487c478bd9Sstevel@tonic-gate 	} while (t != curthread);
8497c478bd9Sstevel@tonic-gate 
8507c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
8517c478bd9Sstevel@tonic-gate 	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
8527c478bd9Sstevel@tonic-gate 	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
8537c478bd9Sstevel@tonic-gate 
8547c478bd9Sstevel@tonic-gate 	newpp = &cp_default;
8557c478bd9Sstevel@tonic-gate 	while ((cp = pp->cp_cpulist) != NULL) {
8567c478bd9Sstevel@tonic-gate 		if (err = cpupart_move_cpu(cp, newpp, 0)) {
8577c478bd9Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
8587c478bd9Sstevel@tonic-gate 			return (err);
8597c478bd9Sstevel@tonic-gate 		}
8607c478bd9Sstevel@tonic-gate 	}
8617c478bd9Sstevel@tonic-gate 
862*e824d57fSjohnlev 	ASSERT(CHIP_SET_ISNULL(pp->cp_mach->mc_chipset));
863*e824d57fSjohnlev 	ASSERT(CPUSET_ISNULL(pp->cp_mach->mc_haltset));
8647c478bd9Sstevel@tonic-gate 
8657c478bd9Sstevel@tonic-gate 	/*
8667c478bd9Sstevel@tonic-gate 	 * Reset the pointers in any offline processors so they won't
8677c478bd9Sstevel@tonic-gate 	 * try to rejoin the destroyed partition when they're turned
8687c478bd9Sstevel@tonic-gate 	 * online.
8697c478bd9Sstevel@tonic-gate 	 */
8707c478bd9Sstevel@tonic-gate 	first_cp = cp = CPU;
8717c478bd9Sstevel@tonic-gate 	do {
8727c478bd9Sstevel@tonic-gate 		if (cp->cpu_part == pp) {
8737c478bd9Sstevel@tonic-gate 			ASSERT(cp->cpu_flags & CPU_OFFLINE);
8747c478bd9Sstevel@tonic-gate 			cp->cpu_part = newpp;
8757c478bd9Sstevel@tonic-gate 		}
8767c478bd9Sstevel@tonic-gate 		cp = cp->cpu_next;
8777c478bd9Sstevel@tonic-gate 	} while (cp != first_cp);
8787c478bd9Sstevel@tonic-gate 
8797c478bd9Sstevel@tonic-gate 	/*
8807c478bd9Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
8817c478bd9Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
8827c478bd9Sstevel@tonic-gate 	 * cpu_lock) isn't running.
8837c478bd9Sstevel@tonic-gate 	 */
8847c478bd9Sstevel@tonic-gate 	pause_cpus(NULL);
8857c478bd9Sstevel@tonic-gate 	pp->cp_prev->cp_next = pp->cp_next;
8867c478bd9Sstevel@tonic-gate 	pp->cp_next->cp_prev = pp->cp_prev;
8877c478bd9Sstevel@tonic-gate 	if (cp_list_head == pp)
8887c478bd9Sstevel@tonic-gate 		cp_list_head = pp->cp_next;
8897c478bd9Sstevel@tonic-gate 	start_cpus();
8907c478bd9Sstevel@tonic-gate 
8917c478bd9Sstevel@tonic-gate 	if (cp_id_next > pp->cp_id)
8927c478bd9Sstevel@tonic-gate 		cp_id_next = pp->cp_id;
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate 	if (pp->cp_kstat)
8957c478bd9Sstevel@tonic-gate 		kstat_delete(pp->cp_kstat);
8967c478bd9Sstevel@tonic-gate 
8977c478bd9Sstevel@tonic-gate 	cp_numparts--;
8987c478bd9Sstevel@tonic-gate 
8997c478bd9Sstevel@tonic-gate 	disp_kp_free(&pp->cp_kp_queue);
9007c478bd9Sstevel@tonic-gate 	kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
9017c478bd9Sstevel@tonic-gate 	pp->cp_lgrploads = NULL;
902*e824d57fSjohnlev 	kmem_free(pp->cp_mach, sizeof (struct mach_cpupart));
9037c478bd9Sstevel@tonic-gate 	kmem_free(pp, sizeof (cpupart_t));
9047c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
9057c478bd9Sstevel@tonic-gate 
9067c478bd9Sstevel@tonic-gate 	return (err);
9077c478bd9Sstevel@tonic-gate }
9087c478bd9Sstevel@tonic-gate 
9097c478bd9Sstevel@tonic-gate 
9107c478bd9Sstevel@tonic-gate /*
9117c478bd9Sstevel@tonic-gate  * Return the ID of the partition to which the specified processor belongs.
9127c478bd9Sstevel@tonic-gate  */
9137c478bd9Sstevel@tonic-gate psetid_t
9147c478bd9Sstevel@tonic-gate cpupart_query_cpu(cpu_t *cp)
9157c478bd9Sstevel@tonic-gate {
9167c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
9177c478bd9Sstevel@tonic-gate 
9187c478bd9Sstevel@tonic-gate 	return (CPTOPS(cp->cpu_part->cp_id));
9197c478bd9Sstevel@tonic-gate }
9207c478bd9Sstevel@tonic-gate 
9217c478bd9Sstevel@tonic-gate 
9227c478bd9Sstevel@tonic-gate /*
9237c478bd9Sstevel@tonic-gate  * Attach a processor to an existing partition.
9247c478bd9Sstevel@tonic-gate  */
9257c478bd9Sstevel@tonic-gate int
9267c478bd9Sstevel@tonic-gate cpupart_attach_cpu(psetid_t psid, cpu_t *cp, int forced)
9277c478bd9Sstevel@tonic-gate {
9287c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
9297c478bd9Sstevel@tonic-gate 	int		err;
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
9327c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
9337c478bd9Sstevel@tonic-gate 
9347c478bd9Sstevel@tonic-gate 	pp = cpupart_find(psid);
9357c478bd9Sstevel@tonic-gate 	if (pp == NULL)
9367c478bd9Sstevel@tonic-gate 		return (EINVAL);
9377c478bd9Sstevel@tonic-gate 	if (cp->cpu_flags & CPU_OFFLINE)
9387c478bd9Sstevel@tonic-gate 		return (EINVAL);
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate 	err = cpupart_move_cpu(cp, pp, forced);
9417c478bd9Sstevel@tonic-gate 	return (err);
9427c478bd9Sstevel@tonic-gate }
9437c478bd9Sstevel@tonic-gate 
9447c478bd9Sstevel@tonic-gate /*
9457c478bd9Sstevel@tonic-gate  * Get a list of cpus belonging to the partition.  If numcpus is NULL,
9467c478bd9Sstevel@tonic-gate  * this just checks for a valid partition.  If numcpus is non-NULL but
9477c478bd9Sstevel@tonic-gate  * cpulist is NULL, the current number of cpus is stored in *numcpus.
9487c478bd9Sstevel@tonic-gate  * If both are non-NULL, the current number of cpus is stored in *numcpus,
9497c478bd9Sstevel@tonic-gate  * and a list of those cpus up to the size originally in *numcpus is
9507c478bd9Sstevel@tonic-gate  * stored in cpulist[].  Also, store the processor set id in *psid.
9517c478bd9Sstevel@tonic-gate  * This is useful in case the processor set id passed in was PS_MYID.
9527c478bd9Sstevel@tonic-gate  */
9537c478bd9Sstevel@tonic-gate int
9547c478bd9Sstevel@tonic-gate cpupart_get_cpus(psetid_t *psid, processorid_t *cpulist, uint_t *numcpus)
9557c478bd9Sstevel@tonic-gate {
9567c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
9577c478bd9Sstevel@tonic-gate 	uint_t		ncpus;
9587c478bd9Sstevel@tonic-gate 	cpu_t		*c;
9597c478bd9Sstevel@tonic-gate 	int		i;
9607c478bd9Sstevel@tonic-gate 
9617c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
9627c478bd9Sstevel@tonic-gate 	pp = cpupart_find(*psid);
9637c478bd9Sstevel@tonic-gate 	if (pp == NULL) {
9647c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
9657c478bd9Sstevel@tonic-gate 		return (EINVAL);
9667c478bd9Sstevel@tonic-gate 	}
9677c478bd9Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
9687c478bd9Sstevel@tonic-gate 	ncpus = pp->cp_ncpus;
9697c478bd9Sstevel@tonic-gate 	if (numcpus) {
9707c478bd9Sstevel@tonic-gate 		if (ncpus > *numcpus) {
9717c478bd9Sstevel@tonic-gate 			/*
9727c478bd9Sstevel@tonic-gate 			 * Only copy as many cpus as were passed in, but
9737c478bd9Sstevel@tonic-gate 			 * pass back the real number.
9747c478bd9Sstevel@tonic-gate 			 */
9757c478bd9Sstevel@tonic-gate 			uint_t t = ncpus;
9767c478bd9Sstevel@tonic-gate 			ncpus = *numcpus;
9777c478bd9Sstevel@tonic-gate 			*numcpus = t;
9787c478bd9Sstevel@tonic-gate 		} else
9797c478bd9Sstevel@tonic-gate 			*numcpus = ncpus;
9807c478bd9Sstevel@tonic-gate 
9817c478bd9Sstevel@tonic-gate 		if (cpulist) {
9827c478bd9Sstevel@tonic-gate 			c = pp->cp_cpulist;
9837c478bd9Sstevel@tonic-gate 			for (i = 0; i < ncpus; i++) {
9847c478bd9Sstevel@tonic-gate 				ASSERT(c != NULL);
9857c478bd9Sstevel@tonic-gate 				cpulist[i] = c->cpu_id;
9867c478bd9Sstevel@tonic-gate 				c = c->cpu_next_part;
9877c478bd9Sstevel@tonic-gate 			}
9887c478bd9Sstevel@tonic-gate 		}
9897c478bd9Sstevel@tonic-gate 	}
9907c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
9917c478bd9Sstevel@tonic-gate 	return (0);
9927c478bd9Sstevel@tonic-gate }
9937c478bd9Sstevel@tonic-gate 
9947c478bd9Sstevel@tonic-gate /*
9957c478bd9Sstevel@tonic-gate  * Reallocate kpreempt queues for each CPU partition.  Called from
9967c478bd9Sstevel@tonic-gate  * disp_setup when a new scheduling class is loaded that increases the
9977c478bd9Sstevel@tonic-gate  * number of priorities in the system.
9987c478bd9Sstevel@tonic-gate  */
9997c478bd9Sstevel@tonic-gate void
10007c478bd9Sstevel@tonic-gate cpupart_kpqalloc(pri_t npri)
10017c478bd9Sstevel@tonic-gate {
10027c478bd9Sstevel@tonic-gate 	cpupart_t *cpp;
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10057c478bd9Sstevel@tonic-gate 	cpp = cp_list_head;
10067c478bd9Sstevel@tonic-gate 	do {
10077c478bd9Sstevel@tonic-gate 		disp_kp_alloc(&cpp->cp_kp_queue, npri);
10087c478bd9Sstevel@tonic-gate 		cpp = cpp->cp_next;
10097c478bd9Sstevel@tonic-gate 	} while (cpp != cp_list_head);
10107c478bd9Sstevel@tonic-gate }
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate int
10137c478bd9Sstevel@tonic-gate cpupart_get_loadavg(psetid_t psid, int *buf, int nelem)
10147c478bd9Sstevel@tonic-gate {
10157c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
10167c478bd9Sstevel@tonic-gate 	int i;
10177c478bd9Sstevel@tonic-gate 
10187c478bd9Sstevel@tonic-gate 	ASSERT(nelem >= 0);
10197c478bd9Sstevel@tonic-gate 	ASSERT(nelem <= LOADAVG_NSTATS);
10207c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10217c478bd9Sstevel@tonic-gate 
10227c478bd9Sstevel@tonic-gate 	cp = cpupart_find(psid);
10237c478bd9Sstevel@tonic-gate 	if (cp == NULL)
10247c478bd9Sstevel@tonic-gate 		return (EINVAL);
10257c478bd9Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
10267c478bd9Sstevel@tonic-gate 		buf[i] = cp->cp_hp_avenrun[i] >> (16 - FSHIFT);
10277c478bd9Sstevel@tonic-gate 
10287c478bd9Sstevel@tonic-gate 	return (0);
10297c478bd9Sstevel@tonic-gate }
10307c478bd9Sstevel@tonic-gate 
10317c478bd9Sstevel@tonic-gate 
10327c478bd9Sstevel@tonic-gate uint_t
10337c478bd9Sstevel@tonic-gate cpupart_list(psetid_t *list, uint_t nelem, int flag)
10347c478bd9Sstevel@tonic-gate {
10357c478bd9Sstevel@tonic-gate 	uint_t numpart = 0;
10367c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
10377c478bd9Sstevel@tonic-gate 
10387c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10397c478bd9Sstevel@tonic-gate 	ASSERT(flag == CP_ALL || flag == CP_NONEMPTY);
10407c478bd9Sstevel@tonic-gate 
10417c478bd9Sstevel@tonic-gate 	if (list != NULL) {
10427c478bd9Sstevel@tonic-gate 		cp = cp_list_head;
10437c478bd9Sstevel@tonic-gate 		do {
10447c478bd9Sstevel@tonic-gate 			if (((flag == CP_ALL) && (cp != &cp_default)) ||
10457c478bd9Sstevel@tonic-gate 			    ((flag == CP_NONEMPTY) && (cp->cp_ncpus != 0))) {
10467c478bd9Sstevel@tonic-gate 				if (numpart == nelem)
10477c478bd9Sstevel@tonic-gate 					break;
10487c478bd9Sstevel@tonic-gate 				list[numpart++] = CPTOPS(cp->cp_id);
10497c478bd9Sstevel@tonic-gate 			}
10507c478bd9Sstevel@tonic-gate 			cp = cp->cp_next;
10517c478bd9Sstevel@tonic-gate 		} while (cp != cp_list_head);
10527c478bd9Sstevel@tonic-gate 	}
10537c478bd9Sstevel@tonic-gate 
10547c478bd9Sstevel@tonic-gate 	ASSERT(numpart < cp_numparts);
10557c478bd9Sstevel@tonic-gate 
10567c478bd9Sstevel@tonic-gate 	if (flag == CP_ALL)
10577c478bd9Sstevel@tonic-gate 		numpart = cp_numparts - 1; /* leave out default partition */
10587c478bd9Sstevel@tonic-gate 	else if (flag == CP_NONEMPTY)
10597c478bd9Sstevel@tonic-gate 		numpart = cp_numparts_nonempty;
10607c478bd9Sstevel@tonic-gate 
10617c478bd9Sstevel@tonic-gate 	return (numpart);
10627c478bd9Sstevel@tonic-gate }
10637c478bd9Sstevel@tonic-gate 
10647c478bd9Sstevel@tonic-gate int
10657c478bd9Sstevel@tonic-gate cpupart_setattr(psetid_t psid, uint_t attr)
10667c478bd9Sstevel@tonic-gate {
10677c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10707c478bd9Sstevel@tonic-gate 
10717c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
10727c478bd9Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
10737c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
10747c478bd9Sstevel@tonic-gate 		return (EINVAL);
10757c478bd9Sstevel@tonic-gate 	}
10767c478bd9Sstevel@tonic-gate 	/*
10777c478bd9Sstevel@tonic-gate 	 * PSET_NOESCAPE attribute for default cpu partition is always set
10787c478bd9Sstevel@tonic-gate 	 */
10797c478bd9Sstevel@tonic-gate 	if (cp == &cp_default && !(attr & PSET_NOESCAPE)) {
10807c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
10817c478bd9Sstevel@tonic-gate 		return (EINVAL);
10827c478bd9Sstevel@tonic-gate 	}
10837c478bd9Sstevel@tonic-gate 	cp->cp_attr = attr;
10847c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
10857c478bd9Sstevel@tonic-gate 	return (0);
10867c478bd9Sstevel@tonic-gate }
10877c478bd9Sstevel@tonic-gate 
10887c478bd9Sstevel@tonic-gate int
10897c478bd9Sstevel@tonic-gate cpupart_getattr(psetid_t psid, uint_t *attrp)
10907c478bd9Sstevel@tonic-gate {
10917c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
10927c478bd9Sstevel@tonic-gate 
10937c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
10947c478bd9Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
10957c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
10967c478bd9Sstevel@tonic-gate 		return (EINVAL);
10977c478bd9Sstevel@tonic-gate 	}
10987c478bd9Sstevel@tonic-gate 	*attrp = cp->cp_attr;
10997c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
11007c478bd9Sstevel@tonic-gate 	return (0);
11017c478bd9Sstevel@tonic-gate }
1102