xref: /titanic_50/usr/src/uts/common/disp/cmt.c (revision fb2f18f820d90b001aea4fb27dd654bc1263c440)
1*fb2f18f8Sesaxe /*
2*fb2f18f8Sesaxe  * CDDL HEADER START
3*fb2f18f8Sesaxe  *
4*fb2f18f8Sesaxe  * The contents of this file are subject to the terms of the
5*fb2f18f8Sesaxe  * Common Development and Distribution License (the "License").
6*fb2f18f8Sesaxe  * You may not use this file except in compliance with the License.
7*fb2f18f8Sesaxe  *
8*fb2f18f8Sesaxe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*fb2f18f8Sesaxe  * or http://www.opensolaris.org/os/licensing.
10*fb2f18f8Sesaxe  * See the License for the specific language governing permissions
11*fb2f18f8Sesaxe  * and limitations under the License.
12*fb2f18f8Sesaxe  *
13*fb2f18f8Sesaxe  * When distributing Covered Code, include this CDDL HEADER in each
14*fb2f18f8Sesaxe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*fb2f18f8Sesaxe  * If applicable, add the following below this CDDL HEADER, with the
16*fb2f18f8Sesaxe  * fields enclosed by brackets "[]" replaced with your own identifying
17*fb2f18f8Sesaxe  * information: Portions Copyright [yyyy] [name of copyright owner]
18*fb2f18f8Sesaxe  *
19*fb2f18f8Sesaxe  * CDDL HEADER END
20*fb2f18f8Sesaxe  */
21*fb2f18f8Sesaxe /*
22*fb2f18f8Sesaxe  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23*fb2f18f8Sesaxe  * Use is subject to license terms.
24*fb2f18f8Sesaxe  */
25*fb2f18f8Sesaxe 
26*fb2f18f8Sesaxe #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*fb2f18f8Sesaxe 
28*fb2f18f8Sesaxe #include <sys/systm.h>
29*fb2f18f8Sesaxe #include <sys/types.h>
30*fb2f18f8Sesaxe #include <sys/param.h>
31*fb2f18f8Sesaxe #include <sys/thread.h>
32*fb2f18f8Sesaxe #include <sys/cpuvar.h>
33*fb2f18f8Sesaxe #include <sys/cpupart.h>
34*fb2f18f8Sesaxe #include <sys/kmem.h>
35*fb2f18f8Sesaxe #include <sys/cmn_err.h>
36*fb2f18f8Sesaxe #include <sys/kstat.h>
37*fb2f18f8Sesaxe #include <sys/processor.h>
38*fb2f18f8Sesaxe #include <sys/disp.h>
39*fb2f18f8Sesaxe #include <sys/group.h>
40*fb2f18f8Sesaxe #include <sys/pghw.h>
41*fb2f18f8Sesaxe #include <sys/bitset.h>
42*fb2f18f8Sesaxe #include <sys/lgrp.h>
43*fb2f18f8Sesaxe #include <sys/cmt.h>
44*fb2f18f8Sesaxe 
45*fb2f18f8Sesaxe /*
46*fb2f18f8Sesaxe  * CMT scheduler / dispatcher support
47*fb2f18f8Sesaxe  *
48*fb2f18f8Sesaxe  * This file implements CMT scheduler support using Processor Groups.
49*fb2f18f8Sesaxe  * The CMT processor group class creates and maintains the CMT class
50*fb2f18f8Sesaxe  * specific processor group pg_cmt_t.
51*fb2f18f8Sesaxe  *
52*fb2f18f8Sesaxe  * ---------------------------- <-- pg_cmt_t *
53*fb2f18f8Sesaxe  * | pghw_t                   |
54*fb2f18f8Sesaxe  * ----------------------------
55*fb2f18f8Sesaxe  * | CMT class specific data  |
56*fb2f18f8Sesaxe  * | - hierarchy linkage      |
57*fb2f18f8Sesaxe  * | - CMT load balancing data|
58*fb2f18f8Sesaxe  * | - active CPU group/bitset|
59*fb2f18f8Sesaxe  * ----------------------------
60*fb2f18f8Sesaxe  *
61*fb2f18f8Sesaxe  * The scheduler/dispatcher leverages knowledge of the performance
62*fb2f18f8Sesaxe  * relevant CMT sharing relationships existing between cpus to implement
63*fb2f18f8Sesaxe  * optimized affinity and load balancing policies.
64*fb2f18f8Sesaxe  *
65*fb2f18f8Sesaxe  * Load balancing policy seeks to improve performance by minimizing
66*fb2f18f8Sesaxe  * contention over shared processor resources / facilities, while the
67*fb2f18f8Sesaxe  * affinity policies seek to improve cache and TLB utilization.
68*fb2f18f8Sesaxe  *
69*fb2f18f8Sesaxe  * The CMT PGs created by this class are already arranged into a
70*fb2f18f8Sesaxe  * hierarchy (which is done in the pghw layer). To implement the top-down
71*fb2f18f8Sesaxe  * CMT load balancing algorithm, the CMT PGs additionally maintain
72*fb2f18f8Sesaxe  * parent, child and sibling hierarchy relationships.
73*fb2f18f8Sesaxe  * Parent PGs always contain a superset of their children(s) resources,
74*fb2f18f8Sesaxe  * each PG can have at most one parent, and siblings are the group of PGs
75*fb2f18f8Sesaxe  * sharing the same parent.
76*fb2f18f8Sesaxe  *
77*fb2f18f8Sesaxe  * On NUMA systems, the CMT load balancing algorithm balances across the
78*fb2f18f8Sesaxe  * CMT PGs within their respective lgroups. On UMA based system, there
79*fb2f18f8Sesaxe  * exists a top level group of PGs to balance across. On NUMA systems multiple
80*fb2f18f8Sesaxe  * top level groups are instantiated, where the top level balancing begins by
81*fb2f18f8Sesaxe  * balancng across the CMT PGs within their respective (per lgroup) top level
82*fb2f18f8Sesaxe  * groups.
83*fb2f18f8Sesaxe  */
84*fb2f18f8Sesaxe 
85*fb2f18f8Sesaxe typedef struct cmt_lgrp {
86*fb2f18f8Sesaxe 	group_t		cl_pgs;		/* Top level group of active CMT PGs */
87*fb2f18f8Sesaxe 	int		cl_npgs;	/* # of top level PGs in the lgroup */
88*fb2f18f8Sesaxe 	lgrp_handle_t	cl_hand;	/* lgroup's platform handle */
89*fb2f18f8Sesaxe 	struct cmt_lgrp *cl_next;	/* next cmt_lgrp */
90*fb2f18f8Sesaxe } cmt_lgrp_t;
91*fb2f18f8Sesaxe 
92*fb2f18f8Sesaxe static cmt_lgrp_t	*cmt_lgrps = NULL;
93*fb2f18f8Sesaxe 
94*fb2f18f8Sesaxe static int		is_cpu0 = 1;
95*fb2f18f8Sesaxe static int		cmt_sched_disabled = 0;
96*fb2f18f8Sesaxe 
97*fb2f18f8Sesaxe static pg_cid_t		pg_cmt_class_id;		/* PG class id */
98*fb2f18f8Sesaxe 
99*fb2f18f8Sesaxe static pg_t		*pg_cmt_alloc();
100*fb2f18f8Sesaxe static void		pg_cmt_free(pg_t *);
101*fb2f18f8Sesaxe static void		pg_cmt_cpu_init(cpu_t *);
102*fb2f18f8Sesaxe static void		pg_cmt_cpu_fini(cpu_t *);
103*fb2f18f8Sesaxe static void		pg_cmt_cpu_active(cpu_t *);
104*fb2f18f8Sesaxe static void		pg_cmt_cpu_inactive(cpu_t *);
105*fb2f18f8Sesaxe static void		pg_cmt_cpupart_in(cpu_t *, cpupart_t *);
106*fb2f18f8Sesaxe static void		pg_cmt_cpupart_move(cpu_t *, cpupart_t *, cpupart_t *);
107*fb2f18f8Sesaxe static void		pg_cmt_hier_pack(pg_cmt_t **, int);
108*fb2f18f8Sesaxe static int		pg_cmt_cpu_belongs(pg_t *, cpu_t *);
109*fb2f18f8Sesaxe static int		pg_cmt_hw(pghw_type_t);
110*fb2f18f8Sesaxe static cmt_lgrp_t	*pg_cmt_find_lgrp(lgrp_handle_t);
111*fb2f18f8Sesaxe 
112*fb2f18f8Sesaxe /*
113*fb2f18f8Sesaxe  * Macro to test if PG is managed by the CMT PG class
114*fb2f18f8Sesaxe  */
115*fb2f18f8Sesaxe #define	IS_CMT_PG(pg)	(((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id)
116*fb2f18f8Sesaxe 
117*fb2f18f8Sesaxe /*
118*fb2f18f8Sesaxe  * CMT PG ops
119*fb2f18f8Sesaxe  */
120*fb2f18f8Sesaxe struct pg_ops pg_ops_cmt = {
121*fb2f18f8Sesaxe 	pg_cmt_alloc,
122*fb2f18f8Sesaxe 	pg_cmt_free,
123*fb2f18f8Sesaxe 	pg_cmt_cpu_init,
124*fb2f18f8Sesaxe 	pg_cmt_cpu_fini,
125*fb2f18f8Sesaxe 	pg_cmt_cpu_active,
126*fb2f18f8Sesaxe 	pg_cmt_cpu_inactive,
127*fb2f18f8Sesaxe 	pg_cmt_cpupart_in,
128*fb2f18f8Sesaxe 	NULL,			/* cpupart_out */
129*fb2f18f8Sesaxe 	pg_cmt_cpupart_move,
130*fb2f18f8Sesaxe 	pg_cmt_cpu_belongs,
131*fb2f18f8Sesaxe };
132*fb2f18f8Sesaxe 
133*fb2f18f8Sesaxe /*
134*fb2f18f8Sesaxe  * Initialize the CMT PG class
135*fb2f18f8Sesaxe  */
136*fb2f18f8Sesaxe void
137*fb2f18f8Sesaxe pg_cmt_class_init(void)
138*fb2f18f8Sesaxe {
139*fb2f18f8Sesaxe 	if (cmt_sched_disabled)
140*fb2f18f8Sesaxe 		return;
141*fb2f18f8Sesaxe 
142*fb2f18f8Sesaxe 	pg_cmt_class_id = pg_class_register("cmt", &pg_ops_cmt, PGR_PHYSICAL);
143*fb2f18f8Sesaxe }
144*fb2f18f8Sesaxe 
145*fb2f18f8Sesaxe /*
146*fb2f18f8Sesaxe  * Called to indicate a new CPU has started up so
147*fb2f18f8Sesaxe  * that either t0 or the slave startup thread can
148*fb2f18f8Sesaxe  * be accounted for.
149*fb2f18f8Sesaxe  */
150*fb2f18f8Sesaxe void
151*fb2f18f8Sesaxe pg_cmt_cpu_startup(cpu_t *cp)
152*fb2f18f8Sesaxe {
153*fb2f18f8Sesaxe 	PG_NRUN_UPDATE(cp, 1);
154*fb2f18f8Sesaxe }
155*fb2f18f8Sesaxe 
156*fb2f18f8Sesaxe /*
157*fb2f18f8Sesaxe  * Adjust the CMT load in the CMT PGs in which the CPU belongs
158*fb2f18f8Sesaxe  * Note that "n" can be positive in the case of increasing
159*fb2f18f8Sesaxe  * load, or negative in the case of decreasing load.
160*fb2f18f8Sesaxe  */
161*fb2f18f8Sesaxe void
162*fb2f18f8Sesaxe pg_cmt_load(cpu_t *cp, int n)
163*fb2f18f8Sesaxe {
164*fb2f18f8Sesaxe 	pg_cmt_t	*pg;
165*fb2f18f8Sesaxe 
166*fb2f18f8Sesaxe 	pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage;
167*fb2f18f8Sesaxe 	while (pg != NULL) {
168*fb2f18f8Sesaxe 		ASSERT(IS_CMT_PG(pg));
169*fb2f18f8Sesaxe 		atomic_add_32(&pg->cmt_nrunning, n);
170*fb2f18f8Sesaxe 		pg = pg->cmt_parent;
171*fb2f18f8Sesaxe 	}
172*fb2f18f8Sesaxe }
173*fb2f18f8Sesaxe 
174*fb2f18f8Sesaxe /*
175*fb2f18f8Sesaxe  * Return non-zero if thread can migrate between "from" and "to"
176*fb2f18f8Sesaxe  * without a performance penalty
177*fb2f18f8Sesaxe  */
178*fb2f18f8Sesaxe int
179*fb2f18f8Sesaxe pg_cmt_can_migrate(cpu_t *from, cpu_t *to)
180*fb2f18f8Sesaxe {
181*fb2f18f8Sesaxe 	if (from->cpu_physid->cpu_cacheid ==
182*fb2f18f8Sesaxe 	    to->cpu_physid->cpu_cacheid)
183*fb2f18f8Sesaxe 		return (1);
184*fb2f18f8Sesaxe 	return (0);
185*fb2f18f8Sesaxe }
186*fb2f18f8Sesaxe 
187*fb2f18f8Sesaxe /*
188*fb2f18f8Sesaxe  * CMT class specific PG allocation
189*fb2f18f8Sesaxe  */
190*fb2f18f8Sesaxe static pg_t *
191*fb2f18f8Sesaxe pg_cmt_alloc(void)
192*fb2f18f8Sesaxe {
193*fb2f18f8Sesaxe 	return (kmem_zalloc(sizeof (pg_cmt_t), KM_NOSLEEP));
194*fb2f18f8Sesaxe }
195*fb2f18f8Sesaxe 
196*fb2f18f8Sesaxe /*
197*fb2f18f8Sesaxe  * Class specific PG de-allocation
198*fb2f18f8Sesaxe  */
199*fb2f18f8Sesaxe static void
200*fb2f18f8Sesaxe pg_cmt_free(pg_t *pg)
201*fb2f18f8Sesaxe {
202*fb2f18f8Sesaxe 	ASSERT(pg != NULL);
203*fb2f18f8Sesaxe 	ASSERT(IS_CMT_PG(pg));
204*fb2f18f8Sesaxe 
205*fb2f18f8Sesaxe 	kmem_free((pg_cmt_t *)pg, sizeof (pg_cmt_t));
206*fb2f18f8Sesaxe }
207*fb2f18f8Sesaxe 
208*fb2f18f8Sesaxe /*
209*fb2f18f8Sesaxe  * Return 1 if CMT load balancing policies should be
210*fb2f18f8Sesaxe  * implemented across instances of the specified hardware
211*fb2f18f8Sesaxe  * sharing relationship.
212*fb2f18f8Sesaxe  */
213*fb2f18f8Sesaxe static int
214*fb2f18f8Sesaxe pg_cmt_load_bal_hw(pghw_type_t hw)
215*fb2f18f8Sesaxe {
216*fb2f18f8Sesaxe 	if (hw == PGHW_IPIPE ||
217*fb2f18f8Sesaxe 	    hw == PGHW_FPU ||
218*fb2f18f8Sesaxe 	    hw == PGHW_CHIP)
219*fb2f18f8Sesaxe 		return (1);
220*fb2f18f8Sesaxe 	else
221*fb2f18f8Sesaxe 		return (0);
222*fb2f18f8Sesaxe }
223*fb2f18f8Sesaxe 
224*fb2f18f8Sesaxe /*
225*fb2f18f8Sesaxe  * Return 1 if thread affinity polices should be implemented
226*fb2f18f8Sesaxe  * for instances of the specifed hardware sharing relationship.
227*fb2f18f8Sesaxe  */
228*fb2f18f8Sesaxe static int
229*fb2f18f8Sesaxe pg_cmt_affinity_hw(pghw_type_t hw)
230*fb2f18f8Sesaxe {
231*fb2f18f8Sesaxe 	if (hw == PGHW_CACHE)
232*fb2f18f8Sesaxe 		return (1);
233*fb2f18f8Sesaxe 	else
234*fb2f18f8Sesaxe 		return (0);
235*fb2f18f8Sesaxe }
236*fb2f18f8Sesaxe 
237*fb2f18f8Sesaxe /*
238*fb2f18f8Sesaxe  * Return 1 if CMT scheduling policies should be impelmented
239*fb2f18f8Sesaxe  * for the specified hardware sharing relationship.
240*fb2f18f8Sesaxe  */
241*fb2f18f8Sesaxe static int
242*fb2f18f8Sesaxe pg_cmt_hw(pghw_type_t hw)
243*fb2f18f8Sesaxe {
244*fb2f18f8Sesaxe 	return (pg_cmt_load_bal_hw(hw) ||
245*fb2f18f8Sesaxe 	    pg_cmt_affinity_hw(hw));
246*fb2f18f8Sesaxe }
247*fb2f18f8Sesaxe 
248*fb2f18f8Sesaxe /*
249*fb2f18f8Sesaxe  * CMT class callback for a new CPU entering the system
250*fb2f18f8Sesaxe  */
251*fb2f18f8Sesaxe static void
252*fb2f18f8Sesaxe pg_cmt_cpu_init(cpu_t *cp)
253*fb2f18f8Sesaxe {
254*fb2f18f8Sesaxe 	pg_cmt_t	*pg;
255*fb2f18f8Sesaxe 	group_t		*cmt_pgs;
256*fb2f18f8Sesaxe 	int		level, max_level, nlevels;
257*fb2f18f8Sesaxe 	pghw_type_t	hw;
258*fb2f18f8Sesaxe 	pg_t		*pg_cache = NULL;
259*fb2f18f8Sesaxe 	pg_cmt_t	*cpu_cmt_hier[PGHW_NUM_COMPONENTS];
260*fb2f18f8Sesaxe 	lgrp_handle_t	lgrp_handle;
261*fb2f18f8Sesaxe 	cmt_lgrp_t	*lgrp;
262*fb2f18f8Sesaxe 
263*fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
264*fb2f18f8Sesaxe 
265*fb2f18f8Sesaxe 	/*
266*fb2f18f8Sesaxe 	 * A new CPU is coming into the system.
267*fb2f18f8Sesaxe 	 * Interrogate the platform to see if the CPU
268*fb2f18f8Sesaxe 	 * has any performance relevant CMT sharing
269*fb2f18f8Sesaxe 	 * relationships
270*fb2f18f8Sesaxe 	 */
271*fb2f18f8Sesaxe 	cmt_pgs = &cp->cpu_pg->cmt_pgs;
272*fb2f18f8Sesaxe 	cp->cpu_pg->cmt_lineage = NULL;
273*fb2f18f8Sesaxe 
274*fb2f18f8Sesaxe 	bzero(cpu_cmt_hier, sizeof (cpu_cmt_hier));
275*fb2f18f8Sesaxe 	max_level = nlevels = 0;
276*fb2f18f8Sesaxe 	for (hw = PGHW_START; hw < PGHW_NUM_COMPONENTS; hw++) {
277*fb2f18f8Sesaxe 
278*fb2f18f8Sesaxe 		/*
279*fb2f18f8Sesaxe 		 * We're only interested in CMT hw sharing relationships
280*fb2f18f8Sesaxe 		 */
281*fb2f18f8Sesaxe 		if (pg_cmt_hw(hw) == 0 || pg_plat_hw_shared(cp, hw) == 0)
282*fb2f18f8Sesaxe 			continue;
283*fb2f18f8Sesaxe 
284*fb2f18f8Sesaxe 		/*
285*fb2f18f8Sesaxe 		 * Find (or create) the PG associated with
286*fb2f18f8Sesaxe 		 * the hw sharing relationship in which cp
287*fb2f18f8Sesaxe 		 * belongs.
288*fb2f18f8Sesaxe 		 *
289*fb2f18f8Sesaxe 		 * Determine if a suitable PG already
290*fb2f18f8Sesaxe 		 * exists, or if one needs to be created.
291*fb2f18f8Sesaxe 		 */
292*fb2f18f8Sesaxe 		pg = (pg_cmt_t *)pghw_place_cpu(cp, hw);
293*fb2f18f8Sesaxe 		if (pg == NULL) {
294*fb2f18f8Sesaxe 			/*
295*fb2f18f8Sesaxe 			 * Create a new one.
296*fb2f18f8Sesaxe 			 * Initialize the common...
297*fb2f18f8Sesaxe 			 */
298*fb2f18f8Sesaxe 			pg = (pg_cmt_t *)pg_create(pg_cmt_class_id);
299*fb2f18f8Sesaxe 
300*fb2f18f8Sesaxe 			/* ... physical ... */
301*fb2f18f8Sesaxe 			pghw_init((pghw_t *)pg, cp, hw);
302*fb2f18f8Sesaxe 
303*fb2f18f8Sesaxe 			/*
304*fb2f18f8Sesaxe 			 * ... and CMT specific portions of the
305*fb2f18f8Sesaxe 			 * structure.
306*fb2f18f8Sesaxe 			 */
307*fb2f18f8Sesaxe 			bitset_init(&pg->cmt_cpus_actv_set);
308*fb2f18f8Sesaxe 			group_create(&pg->cmt_cpus_actv);
309*fb2f18f8Sesaxe 		} else {
310*fb2f18f8Sesaxe 			ASSERT(IS_CMT_PG(pg));
311*fb2f18f8Sesaxe 		}
312*fb2f18f8Sesaxe 
313*fb2f18f8Sesaxe 		/* Add the CPU to the PG */
314*fb2f18f8Sesaxe 		pg_cpu_add((pg_t *)pg, cp);
315*fb2f18f8Sesaxe 
316*fb2f18f8Sesaxe 		/*
317*fb2f18f8Sesaxe 		 * Ensure capacity of the active CPUs group/bitset
318*fb2f18f8Sesaxe 		 */
319*fb2f18f8Sesaxe 		group_expand(&pg->cmt_cpus_actv,
320*fb2f18f8Sesaxe 		    GROUP_SIZE(&((pg_t *)pg)->pg_cpus));
321*fb2f18f8Sesaxe 
322*fb2f18f8Sesaxe 		if (cp->cpu_seqid >=
323*fb2f18f8Sesaxe 		    bitset_capacity(&pg->cmt_cpus_actv_set)) {
324*fb2f18f8Sesaxe 			bitset_resize(&pg->cmt_cpus_actv_set,
325*fb2f18f8Sesaxe 			    cp->cpu_seqid + 1);
326*fb2f18f8Sesaxe 		}
327*fb2f18f8Sesaxe 
328*fb2f18f8Sesaxe 		/*
329*fb2f18f8Sesaxe 		 * Build a lineage of CMT PGs for load balancing
330*fb2f18f8Sesaxe 		 */
331*fb2f18f8Sesaxe 		if (pg_cmt_load_bal_hw(hw)) {
332*fb2f18f8Sesaxe 			level = pghw_level(hw);
333*fb2f18f8Sesaxe 			cpu_cmt_hier[level] = pg;
334*fb2f18f8Sesaxe 			if (level > max_level)
335*fb2f18f8Sesaxe 				max_level = level;
336*fb2f18f8Sesaxe 			nlevels++;
337*fb2f18f8Sesaxe 		}
338*fb2f18f8Sesaxe 
339*fb2f18f8Sesaxe 		/* Cache this for later */
340*fb2f18f8Sesaxe 		if (hw == PGHW_CACHE)
341*fb2f18f8Sesaxe 			pg_cache = (pg_t *)pg;
342*fb2f18f8Sesaxe 	}
343*fb2f18f8Sesaxe 
344*fb2f18f8Sesaxe 	/*
345*fb2f18f8Sesaxe 	 * Pack out any gaps in the constructed lineage.
346*fb2f18f8Sesaxe 	 * Gaps may exist where the architecture knows
347*fb2f18f8Sesaxe 	 * about a hardware sharing relationship, but such a
348*fb2f18f8Sesaxe 	 * relationship either isn't relevant for load
349*fb2f18f8Sesaxe 	 * balancing or doesn't exist between CPUs on the system.
350*fb2f18f8Sesaxe 	 */
351*fb2f18f8Sesaxe 	pg_cmt_hier_pack(cpu_cmt_hier, max_level + 1);
352*fb2f18f8Sesaxe 
353*fb2f18f8Sesaxe 	/*
354*fb2f18f8Sesaxe 	 * For each of the PGs int the CPU's lineage:
355*fb2f18f8Sesaxe 	 *	- Add an entry in the CPU sorted CMT PG group
356*fb2f18f8Sesaxe 	 *	  which is used for top down CMT load balancing
357*fb2f18f8Sesaxe 	 *	- Tie the PG into the CMT hierarchy by connecting
358*fb2f18f8Sesaxe 	 *	  it to it's parent and siblings.
359*fb2f18f8Sesaxe 	 */
360*fb2f18f8Sesaxe 	group_expand(cmt_pgs, nlevels);
361*fb2f18f8Sesaxe 
362*fb2f18f8Sesaxe 	/*
363*fb2f18f8Sesaxe 	 * Find the lgrp that encapsulates this CPU's CMT hierarchy
364*fb2f18f8Sesaxe 	 */
365*fb2f18f8Sesaxe 	lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id);
366*fb2f18f8Sesaxe 	lgrp = pg_cmt_find_lgrp(lgrp_handle);
367*fb2f18f8Sesaxe 
368*fb2f18f8Sesaxe 	for (level = 0; level < nlevels; level++) {
369*fb2f18f8Sesaxe 		uint_t		children;
370*fb2f18f8Sesaxe 		int		err;
371*fb2f18f8Sesaxe 
372*fb2f18f8Sesaxe 		pg = cpu_cmt_hier[level];
373*fb2f18f8Sesaxe 		err = group_add_at(cmt_pgs, pg, nlevels - level - 1);
374*fb2f18f8Sesaxe 		ASSERT(err == 0);
375*fb2f18f8Sesaxe 
376*fb2f18f8Sesaxe 		if (level == 0)
377*fb2f18f8Sesaxe 			cp->cpu_pg->cmt_lineage = (pg_t *)pg;
378*fb2f18f8Sesaxe 
379*fb2f18f8Sesaxe 		if (pg->cmt_siblings != NULL) {
380*fb2f18f8Sesaxe 			/* Already initialized */
381*fb2f18f8Sesaxe 			ASSERT(pg->cmt_parent == NULL ||
382*fb2f18f8Sesaxe 			    pg->cmt_parent == cpu_cmt_hier[level + 1]);
383*fb2f18f8Sesaxe 			ASSERT(pg->cmt_siblings == &lgrp->cl_pgs ||
384*fb2f18f8Sesaxe 			    pg->cmt_siblings == pg->cmt_parent->cmt_children);
385*fb2f18f8Sesaxe 			continue;
386*fb2f18f8Sesaxe 		}
387*fb2f18f8Sesaxe 
388*fb2f18f8Sesaxe 		if ((level + 1) == nlevels) {
389*fb2f18f8Sesaxe 			pg->cmt_parent = NULL;
390*fb2f18f8Sesaxe 			pg->cmt_siblings = &lgrp->cl_pgs;
391*fb2f18f8Sesaxe 			children = ++lgrp->cl_npgs;
392*fb2f18f8Sesaxe 		} else {
393*fb2f18f8Sesaxe 			pg->cmt_parent = cpu_cmt_hier[level + 1];
394*fb2f18f8Sesaxe 
395*fb2f18f8Sesaxe 			/*
396*fb2f18f8Sesaxe 			 * A good parent keeps track of their children.
397*fb2f18f8Sesaxe 			 * The parent's children group is also the PG's
398*fb2f18f8Sesaxe 			 * siblings.
399*fb2f18f8Sesaxe 			 */
400*fb2f18f8Sesaxe 			if (pg->cmt_parent->cmt_children == NULL) {
401*fb2f18f8Sesaxe 				pg->cmt_parent->cmt_children =
402*fb2f18f8Sesaxe 				    kmem_zalloc(sizeof (group_t), KM_SLEEP);
403*fb2f18f8Sesaxe 				group_create(pg->cmt_parent->cmt_children);
404*fb2f18f8Sesaxe 			}
405*fb2f18f8Sesaxe 			pg->cmt_siblings = pg->cmt_parent->cmt_children;
406*fb2f18f8Sesaxe 			children = ++pg->cmt_parent->cmt_nchildren;
407*fb2f18f8Sesaxe 		}
408*fb2f18f8Sesaxe 		pg->cmt_hint = 0;
409*fb2f18f8Sesaxe 		group_expand(pg->cmt_siblings, children);
410*fb2f18f8Sesaxe 	}
411*fb2f18f8Sesaxe 
412*fb2f18f8Sesaxe 	/*
413*fb2f18f8Sesaxe 	 * Cache the chip and core IDs in the cpu_t->cpu_physid structure
414*fb2f18f8Sesaxe 	 * for fast lookups later.
415*fb2f18f8Sesaxe 	 */
416*fb2f18f8Sesaxe 	if (cp->cpu_physid) {
417*fb2f18f8Sesaxe 		cp->cpu_physid->cpu_chipid =
418*fb2f18f8Sesaxe 		    pg_plat_hw_instance_id(cp, PGHW_CHIP);
419*fb2f18f8Sesaxe 		cp->cpu_physid->cpu_coreid = pg_plat_get_core_id(cp);
420*fb2f18f8Sesaxe 
421*fb2f18f8Sesaxe 		/*
422*fb2f18f8Sesaxe 		 * If this cpu has a PG representing shared cache, then set
423*fb2f18f8Sesaxe 		 * cpu_cacheid to that PG's logical id
424*fb2f18f8Sesaxe 		 */
425*fb2f18f8Sesaxe 		if (pg_cache)
426*fb2f18f8Sesaxe 			cp->cpu_physid->cpu_cacheid = pg_cache->pg_id;
427*fb2f18f8Sesaxe 	}
428*fb2f18f8Sesaxe 
429*fb2f18f8Sesaxe 	/* CPU0 only initialization */
430*fb2f18f8Sesaxe 	if (is_cpu0) {
431*fb2f18f8Sesaxe 		pg_cmt_cpu_startup(cp);
432*fb2f18f8Sesaxe 		is_cpu0 = 0;
433*fb2f18f8Sesaxe 	}
434*fb2f18f8Sesaxe 
435*fb2f18f8Sesaxe }
436*fb2f18f8Sesaxe 
437*fb2f18f8Sesaxe /*
438*fb2f18f8Sesaxe  * Class callback when a CPU is leaving the system (deletion)
439*fb2f18f8Sesaxe  */
440*fb2f18f8Sesaxe static void
441*fb2f18f8Sesaxe pg_cmt_cpu_fini(cpu_t *cp)
442*fb2f18f8Sesaxe {
443*fb2f18f8Sesaxe 	group_iter_t	i;
444*fb2f18f8Sesaxe 	pg_cmt_t	*pg;
445*fb2f18f8Sesaxe 	group_t		*pgs, *cmt_pgs;
446*fb2f18f8Sesaxe 	lgrp_handle_t	lgrp_handle;
447*fb2f18f8Sesaxe 	cmt_lgrp_t	*lgrp;
448*fb2f18f8Sesaxe 
449*fb2f18f8Sesaxe 	pgs = &cp->cpu_pg->pgs;
450*fb2f18f8Sesaxe 	cmt_pgs = &cp->cpu_pg->cmt_pgs;
451*fb2f18f8Sesaxe 
452*fb2f18f8Sesaxe 	/*
453*fb2f18f8Sesaxe 	 * Find the lgroup that encapsulates this CPU's CMT hierarchy
454*fb2f18f8Sesaxe 	 */
455*fb2f18f8Sesaxe 	lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id);
456*fb2f18f8Sesaxe 	lgrp = pg_cmt_find_lgrp(lgrp_handle);
457*fb2f18f8Sesaxe 
458*fb2f18f8Sesaxe 	/*
459*fb2f18f8Sesaxe 	 * First, clean up anything load balancing specific for each of
460*fb2f18f8Sesaxe 	 * the CPU's PGs that participated in CMT load balancing
461*fb2f18f8Sesaxe 	 */
462*fb2f18f8Sesaxe 	pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage;
463*fb2f18f8Sesaxe 	while (pg != NULL) {
464*fb2f18f8Sesaxe 
465*fb2f18f8Sesaxe 		/*
466*fb2f18f8Sesaxe 		 * Remove the PG from the CPU's load balancing lineage
467*fb2f18f8Sesaxe 		 */
468*fb2f18f8Sesaxe 		(void) group_remove(cmt_pgs, pg, GRP_RESIZE);
469*fb2f18f8Sesaxe 
470*fb2f18f8Sesaxe 		/*
471*fb2f18f8Sesaxe 		 * If it's about to become empty, destroy it's children
472*fb2f18f8Sesaxe 		 * group, and remove it's reference from it's siblings.
473*fb2f18f8Sesaxe 		 * This is done here (rather than below) to avoid removing
474*fb2f18f8Sesaxe 		 * our reference from a PG that we just eliminated.
475*fb2f18f8Sesaxe 		 */
476*fb2f18f8Sesaxe 		if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 1) {
477*fb2f18f8Sesaxe 			if (pg->cmt_children != NULL)
478*fb2f18f8Sesaxe 				group_destroy(pg->cmt_children);
479*fb2f18f8Sesaxe 			if (pg->cmt_siblings != NULL) {
480*fb2f18f8Sesaxe 				if (pg->cmt_siblings == &lgrp->cl_pgs)
481*fb2f18f8Sesaxe 					lgrp->cl_npgs--;
482*fb2f18f8Sesaxe 				else
483*fb2f18f8Sesaxe 					pg->cmt_parent->cmt_nchildren--;
484*fb2f18f8Sesaxe 			}
485*fb2f18f8Sesaxe 		}
486*fb2f18f8Sesaxe 		pg = pg->cmt_parent;
487*fb2f18f8Sesaxe 	}
488*fb2f18f8Sesaxe 
489*fb2f18f8Sesaxe 	ASSERT(GROUP_SIZE(cmt_pgs) == 0);
490*fb2f18f8Sesaxe 
491*fb2f18f8Sesaxe 	/*
492*fb2f18f8Sesaxe 	 * Now that the load balancing lineage updates have happened,
493*fb2f18f8Sesaxe 	 * remove the CPU from all it's PGs (destroying any that become
494*fb2f18f8Sesaxe 	 * empty).
495*fb2f18f8Sesaxe 	 */
496*fb2f18f8Sesaxe 	group_iter_init(&i);
497*fb2f18f8Sesaxe 	while ((pg = group_iterate(pgs, &i)) != NULL) {
498*fb2f18f8Sesaxe 		if (IS_CMT_PG(pg) == 0)
499*fb2f18f8Sesaxe 			continue;
500*fb2f18f8Sesaxe 
501*fb2f18f8Sesaxe 		pg_cpu_delete((pg_t *)pg, cp);
502*fb2f18f8Sesaxe 		/*
503*fb2f18f8Sesaxe 		 * Deleting the CPU from the PG changes the CPU's
504*fb2f18f8Sesaxe 		 * PG group over which we are actively iterating
505*fb2f18f8Sesaxe 		 * Re-initialize the iteration
506*fb2f18f8Sesaxe 		 */
507*fb2f18f8Sesaxe 		group_iter_init(&i);
508*fb2f18f8Sesaxe 
509*fb2f18f8Sesaxe 		if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 0) {
510*fb2f18f8Sesaxe 
511*fb2f18f8Sesaxe 			/*
512*fb2f18f8Sesaxe 			 * The PG has become zero sized, so destroy it.
513*fb2f18f8Sesaxe 			 */
514*fb2f18f8Sesaxe 			group_destroy(&pg->cmt_cpus_actv);
515*fb2f18f8Sesaxe 			bitset_fini(&pg->cmt_cpus_actv_set);
516*fb2f18f8Sesaxe 			pghw_fini((pghw_t *)pg);
517*fb2f18f8Sesaxe 
518*fb2f18f8Sesaxe 			pg_destroy((pg_t *)pg);
519*fb2f18f8Sesaxe 		}
520*fb2f18f8Sesaxe 	}
521*fb2f18f8Sesaxe }
522*fb2f18f8Sesaxe 
523*fb2f18f8Sesaxe /*
524*fb2f18f8Sesaxe  * Class callback when a CPU is entering a cpu partition
525*fb2f18f8Sesaxe  */
526*fb2f18f8Sesaxe static void
527*fb2f18f8Sesaxe pg_cmt_cpupart_in(cpu_t *cp, cpupart_t *pp)
528*fb2f18f8Sesaxe {
529*fb2f18f8Sesaxe 	group_t		*pgs;
530*fb2f18f8Sesaxe 	pg_t		*pg;
531*fb2f18f8Sesaxe 	group_iter_t	i;
532*fb2f18f8Sesaxe 
533*fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
534*fb2f18f8Sesaxe 
535*fb2f18f8Sesaxe 	pgs = &cp->cpu_pg->pgs;
536*fb2f18f8Sesaxe 
537*fb2f18f8Sesaxe 	/*
538*fb2f18f8Sesaxe 	 * Ensure that the new partition's PG bitset
539*fb2f18f8Sesaxe 	 * is large enough for all CMT PG's to which cp
540*fb2f18f8Sesaxe 	 * belongs
541*fb2f18f8Sesaxe 	 */
542*fb2f18f8Sesaxe 	group_iter_init(&i);
543*fb2f18f8Sesaxe 	while ((pg = group_iterate(pgs, &i)) != NULL) {
544*fb2f18f8Sesaxe 		if (IS_CMT_PG(pg) == 0)
545*fb2f18f8Sesaxe 			continue;
546*fb2f18f8Sesaxe 
547*fb2f18f8Sesaxe 		if (bitset_capacity(&pp->cp_cmt_pgs) <= pg->pg_id)
548*fb2f18f8Sesaxe 			bitset_resize(&pp->cp_cmt_pgs, pg->pg_id + 1);
549*fb2f18f8Sesaxe 	}
550*fb2f18f8Sesaxe }
551*fb2f18f8Sesaxe 
552*fb2f18f8Sesaxe /*
553*fb2f18f8Sesaxe  * Class callback when a CPU is actually moving partitions
554*fb2f18f8Sesaxe  */
555*fb2f18f8Sesaxe static void
556*fb2f18f8Sesaxe pg_cmt_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
557*fb2f18f8Sesaxe {
558*fb2f18f8Sesaxe 	cpu_t		*cpp;
559*fb2f18f8Sesaxe 	group_t		*pgs;
560*fb2f18f8Sesaxe 	pg_t		*pg;
561*fb2f18f8Sesaxe 	group_iter_t	pg_iter;
562*fb2f18f8Sesaxe 	pg_cpu_itr_t	cpu_iter;
563*fb2f18f8Sesaxe 	boolean_t	found;
564*fb2f18f8Sesaxe 
565*fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
566*fb2f18f8Sesaxe 
567*fb2f18f8Sesaxe 	pgs = &cp->cpu_pg->pgs;
568*fb2f18f8Sesaxe 	group_iter_init(&pg_iter);
569*fb2f18f8Sesaxe 
570*fb2f18f8Sesaxe 	/*
571*fb2f18f8Sesaxe 	 * Iterate over the CPUs CMT PGs
572*fb2f18f8Sesaxe 	 */
573*fb2f18f8Sesaxe 	while ((pg = group_iterate(pgs, &pg_iter)) != NULL) {
574*fb2f18f8Sesaxe 
575*fb2f18f8Sesaxe 		if (IS_CMT_PG(pg) == 0)
576*fb2f18f8Sesaxe 			continue;
577*fb2f18f8Sesaxe 
578*fb2f18f8Sesaxe 		/*
579*fb2f18f8Sesaxe 		 * Add the PG to the bitset in the new partition.
580*fb2f18f8Sesaxe 		 */
581*fb2f18f8Sesaxe 		bitset_add(&newpp->cp_cmt_pgs, pg->pg_id);
582*fb2f18f8Sesaxe 
583*fb2f18f8Sesaxe 		/*
584*fb2f18f8Sesaxe 		 * Remove the PG from the bitset in the old partition
585*fb2f18f8Sesaxe 		 * if the last of the PG's CPUs have left.
586*fb2f18f8Sesaxe 		 */
587*fb2f18f8Sesaxe 		found = B_FALSE;
588*fb2f18f8Sesaxe 		PG_CPU_ITR_INIT(pg, cpu_iter);
589*fb2f18f8Sesaxe 		while ((cpp = pg_cpu_next(&cpu_iter)) != NULL) {
590*fb2f18f8Sesaxe 			if (cpp == cp)
591*fb2f18f8Sesaxe 				continue;
592*fb2f18f8Sesaxe 			if (cpp->cpu_part->cp_id == oldpp->cp_id) {
593*fb2f18f8Sesaxe 				found = B_TRUE;
594*fb2f18f8Sesaxe 				break;
595*fb2f18f8Sesaxe 			}
596*fb2f18f8Sesaxe 		}
597*fb2f18f8Sesaxe 		if (!found)
598*fb2f18f8Sesaxe 			bitset_del(&cp->cpu_part->cp_cmt_pgs, pg->pg_id);
599*fb2f18f8Sesaxe 	}
600*fb2f18f8Sesaxe }
601*fb2f18f8Sesaxe 
602*fb2f18f8Sesaxe /*
603*fb2f18f8Sesaxe  * Class callback when a CPU becomes active (online)
604*fb2f18f8Sesaxe  *
605*fb2f18f8Sesaxe  * This is called in a context where CPUs are paused
606*fb2f18f8Sesaxe  */
607*fb2f18f8Sesaxe static void
608*fb2f18f8Sesaxe pg_cmt_cpu_active(cpu_t *cp)
609*fb2f18f8Sesaxe {
610*fb2f18f8Sesaxe 	int		err;
611*fb2f18f8Sesaxe 	group_iter_t	i;
612*fb2f18f8Sesaxe 	pg_cmt_t	*pg;
613*fb2f18f8Sesaxe 	group_t		*pgs;
614*fb2f18f8Sesaxe 
615*fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
616*fb2f18f8Sesaxe 
617*fb2f18f8Sesaxe 	pgs = &cp->cpu_pg->pgs;
618*fb2f18f8Sesaxe 	group_iter_init(&i);
619*fb2f18f8Sesaxe 
620*fb2f18f8Sesaxe 	/*
621*fb2f18f8Sesaxe 	 * Iterate over the CPU's PGs
622*fb2f18f8Sesaxe 	 */
623*fb2f18f8Sesaxe 	while ((pg = group_iterate(pgs, &i)) != NULL) {
624*fb2f18f8Sesaxe 
625*fb2f18f8Sesaxe 		if (IS_CMT_PG(pg) == 0)
626*fb2f18f8Sesaxe 			continue;
627*fb2f18f8Sesaxe 
628*fb2f18f8Sesaxe 		err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE);
629*fb2f18f8Sesaxe 		ASSERT(err == 0);
630*fb2f18f8Sesaxe 
631*fb2f18f8Sesaxe 		/*
632*fb2f18f8Sesaxe 		 * If this is the first active CPU in the PG, and it
633*fb2f18f8Sesaxe 		 * represents a hardware sharing relationship over which
634*fb2f18f8Sesaxe 		 * CMT load balancing is performed, add it as a candidate
635*fb2f18f8Sesaxe 		 * for balancing with it's siblings.
636*fb2f18f8Sesaxe 		 */
637*fb2f18f8Sesaxe 		if (GROUP_SIZE(&pg->cmt_cpus_actv) == 1 &&
638*fb2f18f8Sesaxe 		    pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) {
639*fb2f18f8Sesaxe 			err = group_add(pg->cmt_siblings, pg, GRP_NORESIZE);
640*fb2f18f8Sesaxe 			ASSERT(err == 0);
641*fb2f18f8Sesaxe 		}
642*fb2f18f8Sesaxe 
643*fb2f18f8Sesaxe 		/*
644*fb2f18f8Sesaxe 		 * Notate the CPU in the PGs active CPU bitset.
645*fb2f18f8Sesaxe 		 * Also notate the PG as being active in it's associated
646*fb2f18f8Sesaxe 		 * partition
647*fb2f18f8Sesaxe 		 */
648*fb2f18f8Sesaxe 		bitset_add(&pg->cmt_cpus_actv_set, cp->cpu_seqid);
649*fb2f18f8Sesaxe 		bitset_add(&cp->cpu_part->cp_cmt_pgs, ((pg_t *)pg)->pg_id);
650*fb2f18f8Sesaxe 	}
651*fb2f18f8Sesaxe }
652*fb2f18f8Sesaxe 
653*fb2f18f8Sesaxe /*
654*fb2f18f8Sesaxe  * Class callback when a CPU goes inactive (offline)
655*fb2f18f8Sesaxe  *
656*fb2f18f8Sesaxe  * This is called in a context where CPUs are paused
657*fb2f18f8Sesaxe  */
658*fb2f18f8Sesaxe static void
659*fb2f18f8Sesaxe pg_cmt_cpu_inactive(cpu_t *cp)
660*fb2f18f8Sesaxe {
661*fb2f18f8Sesaxe 	int		err;
662*fb2f18f8Sesaxe 	group_t		*pgs;
663*fb2f18f8Sesaxe 	pg_cmt_t	*pg;
664*fb2f18f8Sesaxe 	cpu_t		*cpp;
665*fb2f18f8Sesaxe 	group_iter_t	i;
666*fb2f18f8Sesaxe 	pg_cpu_itr_t	cpu_itr;
667*fb2f18f8Sesaxe 	boolean_t	found;
668*fb2f18f8Sesaxe 
669*fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
670*fb2f18f8Sesaxe 
671*fb2f18f8Sesaxe 	pgs = &cp->cpu_pg->pgs;
672*fb2f18f8Sesaxe 	group_iter_init(&i);
673*fb2f18f8Sesaxe 
674*fb2f18f8Sesaxe 	while ((pg = group_iterate(pgs, &i)) != NULL) {
675*fb2f18f8Sesaxe 
676*fb2f18f8Sesaxe 		if (IS_CMT_PG(pg) == 0)
677*fb2f18f8Sesaxe 			continue;
678*fb2f18f8Sesaxe 
679*fb2f18f8Sesaxe 		/*
680*fb2f18f8Sesaxe 		 * Remove the CPU from the CMT PGs active CPU group
681*fb2f18f8Sesaxe 		 * bitmap
682*fb2f18f8Sesaxe 		 */
683*fb2f18f8Sesaxe 		err = group_remove(&pg->cmt_cpus_actv, cp, GRP_NORESIZE);
684*fb2f18f8Sesaxe 		ASSERT(err == 0);
685*fb2f18f8Sesaxe 
686*fb2f18f8Sesaxe 		bitset_del(&pg->cmt_cpus_actv_set, cp->cpu_seqid);
687*fb2f18f8Sesaxe 
688*fb2f18f8Sesaxe 		/*
689*fb2f18f8Sesaxe 		 * If there are no more active CPUs in this PG over which
690*fb2f18f8Sesaxe 		 * load was balanced, remove it as a balancing candidate.
691*fb2f18f8Sesaxe 		 */
692*fb2f18f8Sesaxe 		if (GROUP_SIZE(&pg->cmt_cpus_actv) == 0 &&
693*fb2f18f8Sesaxe 		    pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) {
694*fb2f18f8Sesaxe 			err = group_remove(pg->cmt_siblings, pg, GRP_NORESIZE);
695*fb2f18f8Sesaxe 			ASSERT(err == 0);
696*fb2f18f8Sesaxe 		}
697*fb2f18f8Sesaxe 
698*fb2f18f8Sesaxe 		/*
699*fb2f18f8Sesaxe 		 * Assert the number of active CPUs does not exceed
700*fb2f18f8Sesaxe 		 * the total number of CPUs in the PG
701*fb2f18f8Sesaxe 		 */
702*fb2f18f8Sesaxe 		ASSERT(GROUP_SIZE(&pg->cmt_cpus_actv) <=
703*fb2f18f8Sesaxe 		    GROUP_SIZE(&((pg_t *)pg)->pg_cpus));
704*fb2f18f8Sesaxe 
705*fb2f18f8Sesaxe 		/*
706*fb2f18f8Sesaxe 		 * Update the PG bitset in the CPU's old partition
707*fb2f18f8Sesaxe 		 */
708*fb2f18f8Sesaxe 		found = B_FALSE;
709*fb2f18f8Sesaxe 		PG_CPU_ITR_INIT(pg, cpu_itr);
710*fb2f18f8Sesaxe 		while ((cpp = pg_cpu_next(&cpu_itr)) != NULL) {
711*fb2f18f8Sesaxe 			if (cpp == cp)
712*fb2f18f8Sesaxe 				continue;
713*fb2f18f8Sesaxe 			if (cpp->cpu_part->cp_id == cp->cpu_part->cp_id) {
714*fb2f18f8Sesaxe 				found = B_TRUE;
715*fb2f18f8Sesaxe 				break;
716*fb2f18f8Sesaxe 			}
717*fb2f18f8Sesaxe 		}
718*fb2f18f8Sesaxe 		if (!found) {
719*fb2f18f8Sesaxe 			bitset_del(&cp->cpu_part->cp_cmt_pgs,
720*fb2f18f8Sesaxe 			    ((pg_t *)pg)->pg_id);
721*fb2f18f8Sesaxe 		}
722*fb2f18f8Sesaxe 	}
723*fb2f18f8Sesaxe }
724*fb2f18f8Sesaxe 
725*fb2f18f8Sesaxe /*
726*fb2f18f8Sesaxe  * Return non-zero if the CPU belongs in the given PG
727*fb2f18f8Sesaxe  */
728*fb2f18f8Sesaxe static int
729*fb2f18f8Sesaxe pg_cmt_cpu_belongs(pg_t *pg, cpu_t *cp)
730*fb2f18f8Sesaxe {
731*fb2f18f8Sesaxe 	cpu_t	*pg_cpu;
732*fb2f18f8Sesaxe 
733*fb2f18f8Sesaxe 	pg_cpu = GROUP_ACCESS(&pg->pg_cpus, 0);
734*fb2f18f8Sesaxe 
735*fb2f18f8Sesaxe 	ASSERT(pg_cpu != NULL);
736*fb2f18f8Sesaxe 
737*fb2f18f8Sesaxe 	/*
738*fb2f18f8Sesaxe 	 * The CPU belongs if, given the nature of the hardware sharing
739*fb2f18f8Sesaxe 	 * relationship represented by the PG, the CPU has that
740*fb2f18f8Sesaxe 	 * relationship with some other CPU already in the PG
741*fb2f18f8Sesaxe 	 */
742*fb2f18f8Sesaxe 	if (pg_plat_cpus_share(cp, pg_cpu, ((pghw_t *)pg)->pghw_hw))
743*fb2f18f8Sesaxe 		return (1);
744*fb2f18f8Sesaxe 
745*fb2f18f8Sesaxe 	return (0);
746*fb2f18f8Sesaxe }
747*fb2f18f8Sesaxe 
748*fb2f18f8Sesaxe /*
749*fb2f18f8Sesaxe  * Pack the CPUs CMT hierarchy
750*fb2f18f8Sesaxe  * The hierarchy order is preserved
751*fb2f18f8Sesaxe  */
752*fb2f18f8Sesaxe static void
753*fb2f18f8Sesaxe pg_cmt_hier_pack(pg_cmt_t *hier[], int sz)
754*fb2f18f8Sesaxe {
755*fb2f18f8Sesaxe 	int	i, j;
756*fb2f18f8Sesaxe 
757*fb2f18f8Sesaxe 	for (i = 0; i < sz; i++) {
758*fb2f18f8Sesaxe 		if (hier[i] != NULL)
759*fb2f18f8Sesaxe 			continue;
760*fb2f18f8Sesaxe 
761*fb2f18f8Sesaxe 		for (j = i; j < sz; j++) {
762*fb2f18f8Sesaxe 			if (hier[j] != NULL) {
763*fb2f18f8Sesaxe 				hier[i] = hier[j];
764*fb2f18f8Sesaxe 				hier[j] = NULL;
765*fb2f18f8Sesaxe 				break;
766*fb2f18f8Sesaxe 			}
767*fb2f18f8Sesaxe 		}
768*fb2f18f8Sesaxe 		if (j == sz)
769*fb2f18f8Sesaxe 			break;
770*fb2f18f8Sesaxe 	}
771*fb2f18f8Sesaxe }
772*fb2f18f8Sesaxe 
773*fb2f18f8Sesaxe /*
774*fb2f18f8Sesaxe  * Return a cmt_lgrp_t * given an lgroup handle.
775*fb2f18f8Sesaxe  * If the right one doesn't yet exist, create one
776*fb2f18f8Sesaxe  * by growing the cmt_lgrps array
777*fb2f18f8Sesaxe  */
778*fb2f18f8Sesaxe static cmt_lgrp_t *
779*fb2f18f8Sesaxe pg_cmt_find_lgrp(lgrp_handle_t hand)
780*fb2f18f8Sesaxe {
781*fb2f18f8Sesaxe 	cmt_lgrp_t	*lgrp;
782*fb2f18f8Sesaxe 
783*fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
784*fb2f18f8Sesaxe 
785*fb2f18f8Sesaxe 	lgrp = cmt_lgrps;
786*fb2f18f8Sesaxe 	while (lgrp != NULL) {
787*fb2f18f8Sesaxe 		if (lgrp->cl_hand == hand)
788*fb2f18f8Sesaxe 			return (lgrp);
789*fb2f18f8Sesaxe 		lgrp = lgrp->cl_next;
790*fb2f18f8Sesaxe 	}
791*fb2f18f8Sesaxe 
792*fb2f18f8Sesaxe 	/*
793*fb2f18f8Sesaxe 	 * Haven't seen this lgrp yet
794*fb2f18f8Sesaxe 	 */
795*fb2f18f8Sesaxe 	lgrp = kmem_zalloc(sizeof (cmt_lgrp_t), KM_SLEEP);
796*fb2f18f8Sesaxe 
797*fb2f18f8Sesaxe 	lgrp->cl_hand = hand;
798*fb2f18f8Sesaxe 	lgrp->cl_npgs = 0;
799*fb2f18f8Sesaxe 	lgrp->cl_next = cmt_lgrps;
800*fb2f18f8Sesaxe 	cmt_lgrps = lgrp;
801*fb2f18f8Sesaxe 	group_create(&lgrp->cl_pgs);
802*fb2f18f8Sesaxe 
803*fb2f18f8Sesaxe 	return (lgrp);
804*fb2f18f8Sesaxe }
805