xref: /titanic_50/usr/src/uts/common/os/pg.c (revision 0e7515250c8395f368aa45fb9acae7c4f8f8b786)
1fb2f18f8Sesaxe /*
2fb2f18f8Sesaxe  * CDDL HEADER START
3fb2f18f8Sesaxe  *
4fb2f18f8Sesaxe  * The contents of this file are subject to the terms of the
5fb2f18f8Sesaxe  * Common Development and Distribution License (the "License").
6fb2f18f8Sesaxe  * You may not use this file except in compliance with the License.
7fb2f18f8Sesaxe  *
8fb2f18f8Sesaxe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fb2f18f8Sesaxe  * or http://www.opensolaris.org/os/licensing.
10fb2f18f8Sesaxe  * See the License for the specific language governing permissions
11fb2f18f8Sesaxe  * and limitations under the License.
12fb2f18f8Sesaxe  *
13fb2f18f8Sesaxe  * When distributing Covered Code, include this CDDL HEADER in each
14fb2f18f8Sesaxe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fb2f18f8Sesaxe  * If applicable, add the following below this CDDL HEADER, with the
16fb2f18f8Sesaxe  * fields enclosed by brackets "[]" replaced with your own identifying
17fb2f18f8Sesaxe  * information: Portions Copyright [yyyy] [name of copyright owner]
18fb2f18f8Sesaxe  *
19fb2f18f8Sesaxe  * CDDL HEADER END
20fb2f18f8Sesaxe  */
21fb2f18f8Sesaxe /*
22*0e751525SEric Saxe  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23fb2f18f8Sesaxe  * Use is subject to license terms.
24fb2f18f8Sesaxe  */
25fb2f18f8Sesaxe 
26fb2f18f8Sesaxe #include <sys/systm.h>
27fb2f18f8Sesaxe #include <sys/types.h>
28fb2f18f8Sesaxe #include <sys/param.h>
29fb2f18f8Sesaxe #include <sys/thread.h>
30fb2f18f8Sesaxe #include <sys/cpuvar.h>
31fb2f18f8Sesaxe #include <sys/cpupart.h>
32fb2f18f8Sesaxe #include <sys/kmem.h>
33fb2f18f8Sesaxe #include <sys/cmn_err.h>
34fb2f18f8Sesaxe #include <sys/kstat.h>
35fb2f18f8Sesaxe #include <sys/processor.h>
36fb2f18f8Sesaxe #include <sys/disp.h>
37fb2f18f8Sesaxe #include <sys/group.h>
38fb2f18f8Sesaxe #include <sys/pg.h>
39fb2f18f8Sesaxe 
40fb2f18f8Sesaxe /*
41fb2f18f8Sesaxe  * Processor groups
42fb2f18f8Sesaxe  *
43fb2f18f8Sesaxe  * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
44fb2f18f8Sesaxe  * it is no longer necessarily true that a given physical processor module
45fb2f18f8Sesaxe  * will present itself as a single schedulable entity (cpu_t). Rather, each
46fb2f18f8Sesaxe  * chip and/or processor core may present itself as one or more "logical" CPUs.
47fb2f18f8Sesaxe  *
48fb2f18f8Sesaxe  * The logical CPUs presented may share physical components such as caches,
49fb2f18f8Sesaxe  * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the
50fb2f18f8Sesaxe  * kernel be aware of the relationships existing between logical CPUs so that
51fb2f18f8Sesaxe  * the appropriate optmizations may be employed.
52fb2f18f8Sesaxe  *
53fb2f18f8Sesaxe  * The processor group abstraction represents a set of logical CPUs that
54fb2f18f8Sesaxe  * generally share some sort of physical or characteristic relationship.
55fb2f18f8Sesaxe  *
56fb2f18f8Sesaxe  * In the case of a physical sharing relationship, the CPUs in the group may
57fb2f18f8Sesaxe  * share a pipeline, cache or floating point unit. In the case of a logical
58fb2f18f8Sesaxe  * relationship, a PG may represent the set of CPUs in a processor set, or the
59fb2f18f8Sesaxe  * set of CPUs running at a particular clock speed.
60fb2f18f8Sesaxe  *
61fb2f18f8Sesaxe  * The generic processor group structure, pg_t, contains the elements generic
62fb2f18f8Sesaxe  * to a group of CPUs. Depending on the nature of the CPU relationship
63fb2f18f8Sesaxe  * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that
64fb2f18f8Sesaxe  * PG where more specific data is represented.
65fb2f18f8Sesaxe  *
66fb2f18f8Sesaxe  * As an example, a PG representing a PHYSICAL relationship, may be recast to
67fb2f18f8Sesaxe  * a pghw_t, where data further describing the hardware sharing relationship
68fb2f18f8Sesaxe  * is maintained. See pghw.c and pghw.h for details on physical PGs.
69fb2f18f8Sesaxe  *
70fb2f18f8Sesaxe  * At this time a more specialized casting of a PG representing a LOGICAL
71fb2f18f8Sesaxe  * relationship has not been implemented, but the architecture allows for this
72fb2f18f8Sesaxe  * in the future.
73fb2f18f8Sesaxe  *
74fb2f18f8Sesaxe  * Processor Group Classes
75fb2f18f8Sesaxe  *
76fb2f18f8Sesaxe  * Processor group consumers may wish to maintain and associate specific
77fb2f18f8Sesaxe  * data with the PGs they create. For this reason, a mechanism for creating
78fb2f18f8Sesaxe  * class specific PGs exists. Classes may overload the default functions for
79fb2f18f8Sesaxe  * creating, destroying, and associating CPUs with PGs, and may also register
80fb2f18f8Sesaxe  * class specific callbacks to be invoked when the CPU related system
81fb2f18f8Sesaxe  * configuration changes. Class specific data is stored/associated with
82fb2f18f8Sesaxe  * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first
83fb2f18f8Sesaxe  * element of a class specific PG object. In memory, such a structure may look
84fb2f18f8Sesaxe  * like:
85fb2f18f8Sesaxe  *
86fb2f18f8Sesaxe  * ----------------------- - - -
87fb2f18f8Sesaxe  * | common              | | | |  <--(pg_t *)
88fb2f18f8Sesaxe  * ----------------------- | | -
89fb2f18f8Sesaxe  * | HW specific         | | | <-----(pghw_t *)
90fb2f18f8Sesaxe  * ----------------------- | -
91fb2f18f8Sesaxe  * | class specific      | | <-------(pg_cmt_t *)
92fb2f18f8Sesaxe  * ----------------------- -
93fb2f18f8Sesaxe  *
94fb2f18f8Sesaxe  * Access to the PG class specific data can be had by casting a pointer to
95fb2f18f8Sesaxe  * it's class specific view.
96fb2f18f8Sesaxe  */
97fb2f18f8Sesaxe 
98fb2f18f8Sesaxe static pg_t		*pg_alloc_default(pg_class_t);
99fb2f18f8Sesaxe static void		pg_free_default(pg_t *);
100*0e751525SEric Saxe static void		pg_null_op();
101fb2f18f8Sesaxe 
102fb2f18f8Sesaxe /*
103fb2f18f8Sesaxe  * Bootstrap CPU specific PG data
104fb2f18f8Sesaxe  * See pg_cpu_bootstrap()
105fb2f18f8Sesaxe  */
106fb2f18f8Sesaxe static cpu_pg_t		bootstrap_pg_data;
107fb2f18f8Sesaxe 
108fb2f18f8Sesaxe /*
109fb2f18f8Sesaxe  * Bitset of allocated PG ids (they are sequential)
110fb2f18f8Sesaxe  * and the next free id in the set.
111fb2f18f8Sesaxe  */
112fb2f18f8Sesaxe static bitset_t		pg_id_set;
113fb2f18f8Sesaxe static pgid_t		pg_id_next = 0;
114fb2f18f8Sesaxe 
115fb2f18f8Sesaxe /*
116fb2f18f8Sesaxe  * Default and externed PG ops vectors
117fb2f18f8Sesaxe  */
118fb2f18f8Sesaxe static struct pg_ops pg_ops_default = {
119fb2f18f8Sesaxe 	pg_alloc_default,	/* alloc */
120fb2f18f8Sesaxe 	pg_free_default,	/* free */
121fb2f18f8Sesaxe 	NULL,			/* cpu_init */
122fb2f18f8Sesaxe 	NULL,			/* cpu_fini */
123fb2f18f8Sesaxe 	NULL,			/* cpu_active */
124fb2f18f8Sesaxe 	NULL,			/* cpu_inactive */
125fb2f18f8Sesaxe 	NULL,			/* cpupart_in */
126fb2f18f8Sesaxe 	NULL,			/* cpupart_out */
127fb2f18f8Sesaxe 	NULL,			/* cpupart_move */
128fb2f18f8Sesaxe 	NULL,			/* cpu_belongs */
129*0e751525SEric Saxe 	NULL,			/* policy_name */
130*0e751525SEric Saxe };
131*0e751525SEric Saxe 
132*0e751525SEric Saxe static struct pg_cb_ops pg_cb_ops_default = {
133*0e751525SEric Saxe 	pg_null_op,		/* thread_swtch */
134*0e751525SEric Saxe 	pg_null_op,		/* thread_remain */
135fb2f18f8Sesaxe };
136fb2f18f8Sesaxe 
137fb2f18f8Sesaxe /*
138fb2f18f8Sesaxe  * Class specific PG allocation callbacks
139fb2f18f8Sesaxe  */
140fb2f18f8Sesaxe #define	PG_ALLOC(class)							\
141fb2f18f8Sesaxe 	(pg_classes[class].pgc_ops->alloc ?				\
142fb2f18f8Sesaxe 	    pg_classes[class].pgc_ops->alloc() :			\
143fb2f18f8Sesaxe 	    pg_classes[pg_default_cid].pgc_ops->alloc())
144fb2f18f8Sesaxe 
145fb2f18f8Sesaxe #define	PG_FREE(pg)							\
146fb2f18f8Sesaxe 	((pg)->pg_class->pgc_ops->free ?				\
147fb2f18f8Sesaxe 	    (pg)->pg_class->pgc_ops->free(pg) :				\
148fb2f18f8Sesaxe 	    pg_classes[pg_default_cid].pgc_ops->free(pg))		\
149fb2f18f8Sesaxe 
150fb2f18f8Sesaxe 
151fb2f18f8Sesaxe /*
152*0e751525SEric Saxe  * Class specific PG policy name
153*0e751525SEric Saxe  */
154*0e751525SEric Saxe #define	PG_POLICY_NAME(pg)						\
155*0e751525SEric Saxe 	((pg)->pg_class->pgc_ops->policy_name ?				\
156*0e751525SEric Saxe 	    (pg)->pg_class->pgc_ops->policy_name(pg) : NULL)		\
157*0e751525SEric Saxe 
158*0e751525SEric Saxe /*
159fb2f18f8Sesaxe  * Class specific membership test callback
160fb2f18f8Sesaxe  */
161fb2f18f8Sesaxe #define	PG_CPU_BELONGS(pg, cp)						\
162fb2f18f8Sesaxe 	((pg)->pg_class->pgc_ops->cpu_belongs ?				\
163fb2f18f8Sesaxe 	    (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0)		\
164fb2f18f8Sesaxe 
165fb2f18f8Sesaxe /*
166fb2f18f8Sesaxe  * CPU configuration callbacks
167fb2f18f8Sesaxe  */
168fb2f18f8Sesaxe #define	PG_CPU_INIT(class, cp)						\
169fb2f18f8Sesaxe {									\
170fb2f18f8Sesaxe 	if (pg_classes[class].pgc_ops->cpu_init)			\
171fb2f18f8Sesaxe 		pg_classes[class].pgc_ops->cpu_init(cp);		\
172fb2f18f8Sesaxe }
173fb2f18f8Sesaxe 
174fb2f18f8Sesaxe #define	PG_CPU_FINI(class, cp)						\
175fb2f18f8Sesaxe {									\
176fb2f18f8Sesaxe 	if (pg_classes[class].pgc_ops->cpu_fini)			\
177fb2f18f8Sesaxe 		pg_classes[class].pgc_ops->cpu_fini(cp);		\
178fb2f18f8Sesaxe }
179fb2f18f8Sesaxe 
180fb2f18f8Sesaxe #define	PG_CPU_ACTIVE(class, cp)					\
181fb2f18f8Sesaxe {									\
182fb2f18f8Sesaxe 	if (pg_classes[class].pgc_ops->cpu_active)			\
183fb2f18f8Sesaxe 		pg_classes[class].pgc_ops->cpu_active(cp);		\
184fb2f18f8Sesaxe }
185fb2f18f8Sesaxe 
186fb2f18f8Sesaxe #define	PG_CPU_INACTIVE(class, cp)					\
187fb2f18f8Sesaxe {									\
188fb2f18f8Sesaxe 	if (pg_classes[class].pgc_ops->cpu_inactive)			\
189fb2f18f8Sesaxe 		pg_classes[class].pgc_ops->cpu_inactive(cp);		\
190fb2f18f8Sesaxe }
191fb2f18f8Sesaxe 
192fb2f18f8Sesaxe /*
193fb2f18f8Sesaxe  * CPU / cpupart configuration callbacks
194fb2f18f8Sesaxe  */
195fb2f18f8Sesaxe #define	PG_CPUPART_IN(class, cp, pp)					\
196fb2f18f8Sesaxe {									\
197fb2f18f8Sesaxe 	if (pg_classes[class].pgc_ops->cpupart_in)			\
198fb2f18f8Sesaxe 		pg_classes[class].pgc_ops->cpupart_in(cp, pp);		\
199fb2f18f8Sesaxe }
200fb2f18f8Sesaxe 
201fb2f18f8Sesaxe #define	PG_CPUPART_OUT(class, cp, pp)					\
202fb2f18f8Sesaxe {									\
203fb2f18f8Sesaxe 	if (pg_classes[class].pgc_ops->cpupart_out)			\
204fb2f18f8Sesaxe 		pg_classes[class].pgc_ops->cpupart_out(cp, pp);		\
205fb2f18f8Sesaxe }
206fb2f18f8Sesaxe 
207fb2f18f8Sesaxe #define	PG_CPUPART_MOVE(class, cp, old, new)				\
208fb2f18f8Sesaxe {									\
209fb2f18f8Sesaxe 	if (pg_classes[class].pgc_ops->cpupart_move)			\
210fb2f18f8Sesaxe 		pg_classes[class].pgc_ops->cpupart_move(cp, old, new);	\
211fb2f18f8Sesaxe }
212fb2f18f8Sesaxe 
213fb2f18f8Sesaxe 
214fb2f18f8Sesaxe 
215fb2f18f8Sesaxe static pg_class_t	*pg_classes;
216fb2f18f8Sesaxe static int		pg_nclasses;
217fb2f18f8Sesaxe 
218fb2f18f8Sesaxe static pg_cid_t		pg_default_cid;
219fb2f18f8Sesaxe 
220fb2f18f8Sesaxe /*
221*0e751525SEric Saxe  * Initialze common PG subsystem.
222fb2f18f8Sesaxe  */
223fb2f18f8Sesaxe void
224fb2f18f8Sesaxe pg_init(void)
225fb2f18f8Sesaxe {
226*0e751525SEric Saxe 	extern void pg_cmt_class_init();
227*0e751525SEric Saxe 
228fb2f18f8Sesaxe 	pg_default_cid =
229fb2f18f8Sesaxe 	    pg_class_register("default", &pg_ops_default, PGR_LOGICAL);
230*0e751525SEric Saxe 
231*0e751525SEric Saxe 	/*
232*0e751525SEric Saxe 	 * Initialize classes to allow them to register with the framework
233*0e751525SEric Saxe 	 */
234*0e751525SEric Saxe 	pg_cmt_class_init();
235*0e751525SEric Saxe 
236*0e751525SEric Saxe 	pg_cpu0_init();
237fb2f18f8Sesaxe }
238fb2f18f8Sesaxe 
239fb2f18f8Sesaxe /*
240fb2f18f8Sesaxe  * Perform CPU 0 initialization
241fb2f18f8Sesaxe  */
242fb2f18f8Sesaxe void
243fb2f18f8Sesaxe pg_cpu0_init(void)
244fb2f18f8Sesaxe {
245fb2f18f8Sesaxe 	extern void pghw_physid_create();
246fb2f18f8Sesaxe 
247fb2f18f8Sesaxe 	/*
248fb2f18f8Sesaxe 	 * Create the physical ID cache for the boot CPU
249fb2f18f8Sesaxe 	 */
250fb2f18f8Sesaxe 	pghw_physid_create(CPU);
251fb2f18f8Sesaxe 
252fb2f18f8Sesaxe 	/*
253fb2f18f8Sesaxe 	 * pg_cpu_* require that cpu_lock be held
254fb2f18f8Sesaxe 	 */
255fb2f18f8Sesaxe 	mutex_enter(&cpu_lock);
256fb2f18f8Sesaxe 
257fb2f18f8Sesaxe 	pg_cpu_init(CPU);
258fb2f18f8Sesaxe 	pg_cpupart_in(CPU, &cp_default);
259fb2f18f8Sesaxe 	pg_cpu_active(CPU);
260fb2f18f8Sesaxe 
261fb2f18f8Sesaxe 	mutex_exit(&cpu_lock);
262fb2f18f8Sesaxe }
263fb2f18f8Sesaxe 
264fb2f18f8Sesaxe /*
265a6604450Sesaxe  * Invoked when topology for CPU0 changes
266a6604450Sesaxe  * post pg_cpu0_init().
267a6604450Sesaxe  *
268a6604450Sesaxe  * Currently happens as a result of null_proc_lpa
269a6604450Sesaxe  * on Starcat.
270a6604450Sesaxe  */
271a6604450Sesaxe void
272a6604450Sesaxe pg_cpu0_reinit(void)
273a6604450Sesaxe {
274a6604450Sesaxe 	mutex_enter(&cpu_lock);
275a6604450Sesaxe 	pg_cpu_inactive(CPU);
276a6604450Sesaxe 	pg_cpupart_out(CPU, &cp_default);
277a6604450Sesaxe 	pg_cpu_fini(CPU);
278a6604450Sesaxe 
279a6604450Sesaxe 	pg_cpu_init(CPU);
280a6604450Sesaxe 	pg_cpupart_in(CPU, &cp_default);
281a6604450Sesaxe 	pg_cpu_active(CPU);
282a6604450Sesaxe 	mutex_exit(&cpu_lock);
283a6604450Sesaxe }
284a6604450Sesaxe 
285a6604450Sesaxe /*
286fb2f18f8Sesaxe  * Register a new PG class
287fb2f18f8Sesaxe  */
288fb2f18f8Sesaxe pg_cid_t
289fb2f18f8Sesaxe pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation)
290fb2f18f8Sesaxe {
291fb2f18f8Sesaxe 	pg_class_t	*newclass;
292fb2f18f8Sesaxe 	pg_class_t	*classes_old;
293fb2f18f8Sesaxe 	id_t		cid;
294fb2f18f8Sesaxe 
295fb2f18f8Sesaxe 	mutex_enter(&cpu_lock);
296fb2f18f8Sesaxe 
297fb2f18f8Sesaxe 	/*
298fb2f18f8Sesaxe 	 * Allocate a new pg_class_t in the pg_classes array
299fb2f18f8Sesaxe 	 */
300fb2f18f8Sesaxe 	if (pg_nclasses == 0) {
301fb2f18f8Sesaxe 		pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP);
302fb2f18f8Sesaxe 	} else {
303fb2f18f8Sesaxe 		classes_old = pg_classes;
304fb2f18f8Sesaxe 		pg_classes =
305fb2f18f8Sesaxe 		    kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1),
306fb2f18f8Sesaxe 		    KM_SLEEP);
307fb2f18f8Sesaxe 		(void) kcopy(classes_old, pg_classes,
308fb2f18f8Sesaxe 		    sizeof (pg_class_t) * pg_nclasses);
309fb2f18f8Sesaxe 		kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses);
310fb2f18f8Sesaxe 	}
311fb2f18f8Sesaxe 
312fb2f18f8Sesaxe 	cid = pg_nclasses++;
313fb2f18f8Sesaxe 	newclass = &pg_classes[cid];
314fb2f18f8Sesaxe 
315fb2f18f8Sesaxe 	(void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX);
316fb2f18f8Sesaxe 	newclass->pgc_id = cid;
317fb2f18f8Sesaxe 	newclass->pgc_ops = ops;
318fb2f18f8Sesaxe 	newclass->pgc_relation = relation;
319fb2f18f8Sesaxe 
320fb2f18f8Sesaxe 	mutex_exit(&cpu_lock);
321fb2f18f8Sesaxe 
322fb2f18f8Sesaxe 	return (cid);
323fb2f18f8Sesaxe }
324fb2f18f8Sesaxe 
325fb2f18f8Sesaxe /*
326fb2f18f8Sesaxe  * Try to find an existing pg in set in which to place cp.
327fb2f18f8Sesaxe  * Returns the pg if found, and NULL otherwise.
328fb2f18f8Sesaxe  * In the event that the CPU could belong to multiple
329fb2f18f8Sesaxe  * PGs in the set, the first matching PG will be returned.
330fb2f18f8Sesaxe  */
331fb2f18f8Sesaxe pg_t *
332fb2f18f8Sesaxe pg_cpu_find_pg(cpu_t *cp, group_t *set)
333fb2f18f8Sesaxe {
334fb2f18f8Sesaxe 	pg_t		*pg;
335fb2f18f8Sesaxe 	group_iter_t	i;
336fb2f18f8Sesaxe 
337fb2f18f8Sesaxe 	group_iter_init(&i);
338fb2f18f8Sesaxe 	while ((pg = group_iterate(set, &i)) != NULL) {
339fb2f18f8Sesaxe 		/*
340fb2f18f8Sesaxe 		 * Ask the class if the CPU belongs here
341fb2f18f8Sesaxe 		 */
342fb2f18f8Sesaxe 		if (PG_CPU_BELONGS(pg, cp))
343fb2f18f8Sesaxe 			return (pg);
344fb2f18f8Sesaxe 	}
345fb2f18f8Sesaxe 	return (NULL);
346fb2f18f8Sesaxe }
347fb2f18f8Sesaxe 
348fb2f18f8Sesaxe /*
349fb2f18f8Sesaxe  * Iterate over the CPUs in a PG after initializing
350fb2f18f8Sesaxe  * the iterator with PG_CPU_ITR_INIT()
351fb2f18f8Sesaxe  */
352fb2f18f8Sesaxe cpu_t *
353fb2f18f8Sesaxe pg_cpu_next(pg_cpu_itr_t *itr)
354fb2f18f8Sesaxe {
355fb2f18f8Sesaxe 	cpu_t		*cpu;
356fb2f18f8Sesaxe 	pg_t		*pg = itr->pg;
357fb2f18f8Sesaxe 
358fb2f18f8Sesaxe 	cpu = group_iterate(&pg->pg_cpus, &itr->position);
359fb2f18f8Sesaxe 	return (cpu);
360fb2f18f8Sesaxe }
361fb2f18f8Sesaxe 
362fb2f18f8Sesaxe /*
363*0e751525SEric Saxe  * Test if a given PG contains a given CPU
364*0e751525SEric Saxe  */
365*0e751525SEric Saxe boolean_t
366*0e751525SEric Saxe pg_cpu_find(pg_t *pg, cpu_t *cp)
367*0e751525SEric Saxe {
368*0e751525SEric Saxe 	if (group_find(&pg->pg_cpus, cp) == (uint_t)-1)
369*0e751525SEric Saxe 		return (B_FALSE);
370*0e751525SEric Saxe 
371*0e751525SEric Saxe 	return (B_TRUE);
372*0e751525SEric Saxe }
373*0e751525SEric Saxe 
374*0e751525SEric Saxe /*
375*0e751525SEric Saxe  * Set the PGs callbacks to the default
376*0e751525SEric Saxe  */
377*0e751525SEric Saxe void
378*0e751525SEric Saxe pg_callback_set_defaults(pg_t *pg)
379*0e751525SEric Saxe {
380*0e751525SEric Saxe 	bcopy(&pg_cb_ops_default, &pg->pg_cb, sizeof (struct pg_cb_ops));
381*0e751525SEric Saxe }
382*0e751525SEric Saxe 
383*0e751525SEric Saxe /*
384fb2f18f8Sesaxe  * Create a PG of a given class.
385fb2f18f8Sesaxe  * This routine may block.
386fb2f18f8Sesaxe  */
387fb2f18f8Sesaxe pg_t *
388fb2f18f8Sesaxe pg_create(pg_cid_t cid)
389fb2f18f8Sesaxe {
390fb2f18f8Sesaxe 	pg_t	*pg;
391fb2f18f8Sesaxe 	pgid_t	id;
392fb2f18f8Sesaxe 
393fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
394fb2f18f8Sesaxe 
395fb2f18f8Sesaxe 	/*
396fb2f18f8Sesaxe 	 * Call the class specific PG allocation routine
397fb2f18f8Sesaxe 	 */
398fb2f18f8Sesaxe 	pg = PG_ALLOC(cid);
399fb2f18f8Sesaxe 	pg->pg_class = &pg_classes[cid];
400fb2f18f8Sesaxe 	pg->pg_relation = pg->pg_class->pgc_relation;
401fb2f18f8Sesaxe 
402fb2f18f8Sesaxe 	/*
403fb2f18f8Sesaxe 	 * Find the next free sequential pg id
404fb2f18f8Sesaxe 	 */
405fb2f18f8Sesaxe 	do {
406fb2f18f8Sesaxe 		if (pg_id_next >= bitset_capacity(&pg_id_set))
407fb2f18f8Sesaxe 			bitset_resize(&pg_id_set, pg_id_next + 1);
408fb2f18f8Sesaxe 		id = pg_id_next++;
409fb2f18f8Sesaxe 	} while (bitset_in_set(&pg_id_set, id));
410fb2f18f8Sesaxe 
411fb2f18f8Sesaxe 	pg->pg_id = id;
412fb2f18f8Sesaxe 	bitset_add(&pg_id_set, pg->pg_id);
413fb2f18f8Sesaxe 
414fb2f18f8Sesaxe 	/*
415fb2f18f8Sesaxe 	 * Create the PG's CPU group
416fb2f18f8Sesaxe 	 */
417fb2f18f8Sesaxe 	group_create(&pg->pg_cpus);
418fb2f18f8Sesaxe 
419*0e751525SEric Saxe 	/*
420*0e751525SEric Saxe 	 * Initialize the events ops vector
421*0e751525SEric Saxe 	 */
422*0e751525SEric Saxe 	pg_callback_set_defaults(pg);
423*0e751525SEric Saxe 
424fb2f18f8Sesaxe 	return (pg);
425fb2f18f8Sesaxe }
426fb2f18f8Sesaxe 
427fb2f18f8Sesaxe /*
428fb2f18f8Sesaxe  * Destroy a PG.
429fb2f18f8Sesaxe  * This routine may block.
430fb2f18f8Sesaxe  */
431fb2f18f8Sesaxe void
432fb2f18f8Sesaxe pg_destroy(pg_t *pg)
433fb2f18f8Sesaxe {
434fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
435fb2f18f8Sesaxe 
436fb2f18f8Sesaxe 	group_destroy(&pg->pg_cpus);
437fb2f18f8Sesaxe 
438fb2f18f8Sesaxe 	/*
439fb2f18f8Sesaxe 	 * Unassign the pg_id
440fb2f18f8Sesaxe 	 */
441fb2f18f8Sesaxe 	if (pg_id_next > pg->pg_id)
442fb2f18f8Sesaxe 		pg_id_next = pg->pg_id;
443fb2f18f8Sesaxe 	bitset_del(&pg_id_set, pg->pg_id);
444fb2f18f8Sesaxe 
445fb2f18f8Sesaxe 	/*
446fb2f18f8Sesaxe 	 * Invoke the class specific de-allocation routine
447fb2f18f8Sesaxe 	 */
448fb2f18f8Sesaxe 	PG_FREE(pg);
449fb2f18f8Sesaxe }
450fb2f18f8Sesaxe 
451fb2f18f8Sesaxe /*
452fb2f18f8Sesaxe  * Add the CPU "cp" to processor group "pg"
453fb2f18f8Sesaxe  * This routine may block.
454fb2f18f8Sesaxe  */
455fb2f18f8Sesaxe void
456fb2f18f8Sesaxe pg_cpu_add(pg_t *pg, cpu_t *cp)
457fb2f18f8Sesaxe {
458fb2f18f8Sesaxe 	int	err;
459fb2f18f8Sesaxe 
460fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
461fb2f18f8Sesaxe 
462fb2f18f8Sesaxe 	/* This adds the CPU to the PG's CPU group */
463fb2f18f8Sesaxe 	err = group_add(&pg->pg_cpus, cp, GRP_RESIZE);
464fb2f18f8Sesaxe 	ASSERT(err == 0);
465fb2f18f8Sesaxe 
466fb2f18f8Sesaxe 	/* This adds the PG to the CPUs PG group */
467fb2f18f8Sesaxe 	ASSERT(cp->cpu_pg != &bootstrap_pg_data);
468fb2f18f8Sesaxe 	err = group_add(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
469fb2f18f8Sesaxe 	ASSERT(err == 0);
470fb2f18f8Sesaxe }
471fb2f18f8Sesaxe 
472fb2f18f8Sesaxe /*
473fb2f18f8Sesaxe  * Remove "cp" from "pg".
474fb2f18f8Sesaxe  * This routine may block.
475fb2f18f8Sesaxe  */
476fb2f18f8Sesaxe void
477fb2f18f8Sesaxe pg_cpu_delete(pg_t *pg, cpu_t *cp)
478fb2f18f8Sesaxe {
479fb2f18f8Sesaxe 	int	err;
480fb2f18f8Sesaxe 
481fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
482fb2f18f8Sesaxe 
483fb2f18f8Sesaxe 	/* Remove the CPU from the PG */
484fb2f18f8Sesaxe 	err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE);
485fb2f18f8Sesaxe 	ASSERT(err == 0);
486fb2f18f8Sesaxe 
487fb2f18f8Sesaxe 	/* Remove the PG from the CPU's PG group */
488fb2f18f8Sesaxe 	ASSERT(cp->cpu_pg != &bootstrap_pg_data);
489fb2f18f8Sesaxe 	err = group_remove(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
490fb2f18f8Sesaxe 	ASSERT(err == 0);
491fb2f18f8Sesaxe }
492fb2f18f8Sesaxe 
493fb2f18f8Sesaxe /*
494fb2f18f8Sesaxe  * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg
495fb2f18f8Sesaxe  */
496fb2f18f8Sesaxe static cpu_pg_t *
497fb2f18f8Sesaxe pg_cpu_data_alloc(void)
498fb2f18f8Sesaxe {
499fb2f18f8Sesaxe 	cpu_pg_t	*pgd;
500fb2f18f8Sesaxe 
501fb2f18f8Sesaxe 	pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP);
502fb2f18f8Sesaxe 	group_create(&pgd->pgs);
503fb2f18f8Sesaxe 	group_create(&pgd->cmt_pgs);
504fb2f18f8Sesaxe 
505fb2f18f8Sesaxe 	return (pgd);
506fb2f18f8Sesaxe }
507fb2f18f8Sesaxe 
508fb2f18f8Sesaxe /*
509fb2f18f8Sesaxe  * Free the CPU's PG data.
510fb2f18f8Sesaxe  */
511fb2f18f8Sesaxe static void
512fb2f18f8Sesaxe pg_cpu_data_free(cpu_pg_t *pgd)
513fb2f18f8Sesaxe {
514fb2f18f8Sesaxe 	group_destroy(&pgd->pgs);
515fb2f18f8Sesaxe 	group_destroy(&pgd->cmt_pgs);
516fb2f18f8Sesaxe 	kmem_free(pgd, sizeof (cpu_pg_t));
517fb2f18f8Sesaxe }
518fb2f18f8Sesaxe 
519fb2f18f8Sesaxe /*
520fb2f18f8Sesaxe  * A new CPU is coming into the system, either via booting or DR.
521fb2f18f8Sesaxe  * Allocate it's PG data, and notify all registered classes about
522fb2f18f8Sesaxe  * the new CPU.
523fb2f18f8Sesaxe  *
524fb2f18f8Sesaxe  * This routine may block.
525fb2f18f8Sesaxe  */
526fb2f18f8Sesaxe void
527fb2f18f8Sesaxe pg_cpu_init(cpu_t *cp)
528fb2f18f8Sesaxe {
529fb2f18f8Sesaxe 	pg_cid_t	i;
530fb2f18f8Sesaxe 
531fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
532fb2f18f8Sesaxe 
533fb2f18f8Sesaxe 	/*
534fb2f18f8Sesaxe 	 * Allocate and size the per CPU pg data
535fb2f18f8Sesaxe 	 */
536fb2f18f8Sesaxe 	cp->cpu_pg = pg_cpu_data_alloc();
537fb2f18f8Sesaxe 
538fb2f18f8Sesaxe 	/*
539fb2f18f8Sesaxe 	 * Notify all registered classes about the new CPU
540fb2f18f8Sesaxe 	 */
541fb2f18f8Sesaxe 	for (i = 0; i < pg_nclasses; i++)
542fb2f18f8Sesaxe 		PG_CPU_INIT(i, cp);
543fb2f18f8Sesaxe }
544fb2f18f8Sesaxe 
545fb2f18f8Sesaxe /*
546fb2f18f8Sesaxe  * This CPU is being deleted from the system. Notify the classes
547fb2f18f8Sesaxe  * and free up the CPU's PG data.
548fb2f18f8Sesaxe  */
549fb2f18f8Sesaxe void
550fb2f18f8Sesaxe pg_cpu_fini(cpu_t *cp)
551fb2f18f8Sesaxe {
552fb2f18f8Sesaxe 	pg_cid_t	i;
553fb2f18f8Sesaxe 
554fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
555fb2f18f8Sesaxe 
556fb2f18f8Sesaxe 	/*
557fb2f18f8Sesaxe 	 * This can happen if the CPU coming into the system
558fb2f18f8Sesaxe 	 * failed to power on.
559fb2f18f8Sesaxe 	 */
560fb2f18f8Sesaxe 	if (cp->cpu_pg == NULL ||
561fb2f18f8Sesaxe 	    cp->cpu_pg == &bootstrap_pg_data)
562fb2f18f8Sesaxe 		return;
563fb2f18f8Sesaxe 
564fb2f18f8Sesaxe 	for (i = 0; i < pg_nclasses; i++)
565fb2f18f8Sesaxe 		PG_CPU_FINI(i, cp);
566fb2f18f8Sesaxe 
567fb2f18f8Sesaxe 	pg_cpu_data_free(cp->cpu_pg);
568fb2f18f8Sesaxe 	cp->cpu_pg = NULL;
569fb2f18f8Sesaxe }
570fb2f18f8Sesaxe 
571fb2f18f8Sesaxe /*
572fb2f18f8Sesaxe  * This CPU is becoming active (online)
573fb2f18f8Sesaxe  * This routine may not block as it is called from paused CPUs
574fb2f18f8Sesaxe  * context.
575fb2f18f8Sesaxe  */
576fb2f18f8Sesaxe void
577fb2f18f8Sesaxe pg_cpu_active(cpu_t *cp)
578fb2f18f8Sesaxe {
579fb2f18f8Sesaxe 	pg_cid_t	i;
580fb2f18f8Sesaxe 
581fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
582fb2f18f8Sesaxe 
583fb2f18f8Sesaxe 	/*
584fb2f18f8Sesaxe 	 * Notify all registered classes about the new CPU
585fb2f18f8Sesaxe 	 */
586fb2f18f8Sesaxe 	for (i = 0; i < pg_nclasses; i++)
587fb2f18f8Sesaxe 		PG_CPU_ACTIVE(i, cp);
588fb2f18f8Sesaxe }
589fb2f18f8Sesaxe 
590fb2f18f8Sesaxe /*
591fb2f18f8Sesaxe  * This CPU is going inactive (offline)
592fb2f18f8Sesaxe  * This routine may not block, as it is called from paused
593fb2f18f8Sesaxe  * CPUs context.
594fb2f18f8Sesaxe  */
595fb2f18f8Sesaxe void
596fb2f18f8Sesaxe pg_cpu_inactive(cpu_t *cp)
597fb2f18f8Sesaxe {
598fb2f18f8Sesaxe 	pg_cid_t	i;
599fb2f18f8Sesaxe 
600fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
601fb2f18f8Sesaxe 
602fb2f18f8Sesaxe 	/*
603fb2f18f8Sesaxe 	 * Notify all registered classes about the new CPU
604fb2f18f8Sesaxe 	 */
605fb2f18f8Sesaxe 	for (i = 0; i < pg_nclasses; i++)
606fb2f18f8Sesaxe 		PG_CPU_INACTIVE(i, cp);
607fb2f18f8Sesaxe }
608fb2f18f8Sesaxe 
609fb2f18f8Sesaxe /*
610fb2f18f8Sesaxe  * Invoked when the CPU is about to move into the partition
611fb2f18f8Sesaxe  * This routine may block.
612fb2f18f8Sesaxe  */
613fb2f18f8Sesaxe void
614fb2f18f8Sesaxe pg_cpupart_in(cpu_t *cp, cpupart_t *pp)
615fb2f18f8Sesaxe {
616fb2f18f8Sesaxe 	int	i;
617fb2f18f8Sesaxe 
618fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
619fb2f18f8Sesaxe 
620fb2f18f8Sesaxe 	/*
621fb2f18f8Sesaxe 	 * Notify all registered classes that the
622fb2f18f8Sesaxe 	 * CPU is about to enter the CPU partition
623fb2f18f8Sesaxe 	 */
624fb2f18f8Sesaxe 	for (i = 0; i < pg_nclasses; i++)
625fb2f18f8Sesaxe 		PG_CPUPART_IN(i, cp, pp);
626fb2f18f8Sesaxe }
627fb2f18f8Sesaxe 
628fb2f18f8Sesaxe /*
629fb2f18f8Sesaxe  * Invoked when the CPU is about to move out of the partition
630fb2f18f8Sesaxe  * This routine may block.
631fb2f18f8Sesaxe  */
632fb2f18f8Sesaxe /*ARGSUSED*/
633fb2f18f8Sesaxe void
634fb2f18f8Sesaxe pg_cpupart_out(cpu_t *cp, cpupart_t *pp)
635fb2f18f8Sesaxe {
636fb2f18f8Sesaxe 	int	i;
637fb2f18f8Sesaxe 
638fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
639fb2f18f8Sesaxe 
640fb2f18f8Sesaxe 	/*
641fb2f18f8Sesaxe 	 * Notify all registered classes that the
642fb2f18f8Sesaxe 	 * CPU is about to leave the CPU partition
643fb2f18f8Sesaxe 	 */
644fb2f18f8Sesaxe 	for (i = 0; i < pg_nclasses; i++)
645fb2f18f8Sesaxe 		PG_CPUPART_OUT(i, cp, pp);
646fb2f18f8Sesaxe }
647fb2f18f8Sesaxe 
648fb2f18f8Sesaxe /*
649fb2f18f8Sesaxe  * Invoked when the CPU is *moving* partitions.
650fb2f18f8Sesaxe  *
651fb2f18f8Sesaxe  * This routine may not block, as it is called from paused CPUs
652fb2f18f8Sesaxe  * context.
653fb2f18f8Sesaxe  */
654fb2f18f8Sesaxe void
655fb2f18f8Sesaxe pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
656fb2f18f8Sesaxe {
657fb2f18f8Sesaxe 	int	i;
658fb2f18f8Sesaxe 
659fb2f18f8Sesaxe 	ASSERT(MUTEX_HELD(&cpu_lock));
660fb2f18f8Sesaxe 
661fb2f18f8Sesaxe 	/*
662fb2f18f8Sesaxe 	 * Notify all registered classes that the
663fb2f18f8Sesaxe 	 * CPU is about to leave the CPU partition
664fb2f18f8Sesaxe 	 */
665fb2f18f8Sesaxe 	for (i = 0; i < pg_nclasses; i++)
666fb2f18f8Sesaxe 		PG_CPUPART_MOVE(i, cp, oldpp, newpp);
667fb2f18f8Sesaxe }
668fb2f18f8Sesaxe 
669fb2f18f8Sesaxe /*
670*0e751525SEric Saxe  * Return a class specific string describing a policy implemented
671*0e751525SEric Saxe  * across this PG
672*0e751525SEric Saxe  */
673*0e751525SEric Saxe char *
674*0e751525SEric Saxe pg_policy_name(pg_t *pg)
675*0e751525SEric Saxe {
676*0e751525SEric Saxe 	char *str;
677*0e751525SEric Saxe 	if ((str = PG_POLICY_NAME(pg)) != NULL)
678*0e751525SEric Saxe 		return (str);
679*0e751525SEric Saxe 
680*0e751525SEric Saxe 	return ("N/A");
681*0e751525SEric Saxe }
682*0e751525SEric Saxe 
683*0e751525SEric Saxe /*
684fb2f18f8Sesaxe  * Provide the specified CPU a bootstrap pg
685fb2f18f8Sesaxe  * This is needed to allow sane behaviour if any PG consuming
686fb2f18f8Sesaxe  * code needs to deal with a partially initialized CPU
687fb2f18f8Sesaxe  */
688fb2f18f8Sesaxe void
689fb2f18f8Sesaxe pg_cpu_bootstrap(cpu_t *cp)
690fb2f18f8Sesaxe {
691fb2f18f8Sesaxe 	cp->cpu_pg = &bootstrap_pg_data;
692fb2f18f8Sesaxe }
693fb2f18f8Sesaxe 
694fb2f18f8Sesaxe /*ARGSUSED*/
695fb2f18f8Sesaxe static pg_t *
696fb2f18f8Sesaxe pg_alloc_default(pg_class_t class)
697fb2f18f8Sesaxe {
698fb2f18f8Sesaxe 	return (kmem_zalloc(sizeof (pg_t), KM_SLEEP));
699fb2f18f8Sesaxe }
700fb2f18f8Sesaxe 
701fb2f18f8Sesaxe /*ARGSUSED*/
702fb2f18f8Sesaxe static void
703fb2f18f8Sesaxe pg_free_default(struct pg *pg)
704fb2f18f8Sesaxe {
705fb2f18f8Sesaxe 	kmem_free(pg, sizeof (pg_t));
706fb2f18f8Sesaxe }
707*0e751525SEric Saxe 
708*0e751525SEric Saxe static void
709*0e751525SEric Saxe pg_null_op()
710*0e751525SEric Saxe {
711*0e751525SEric Saxe }
712*0e751525SEric Saxe 
713*0e751525SEric Saxe /*
714*0e751525SEric Saxe  * Invoke the "thread switch" callback for each of the CPU's PGs
715*0e751525SEric Saxe  * This is invoked from the dispatcher swtch() routine, which is called
716*0e751525SEric Saxe  * when a thread running an a CPU should switch to another thread.
717*0e751525SEric Saxe  * "cp" is the CPU on which the thread switch is happening
718*0e751525SEric Saxe  * "now" is an unscaled hrtime_t timestamp taken in swtch()
719*0e751525SEric Saxe  * "old" and "new" are the outgoing and incoming threads, respectively.
720*0e751525SEric Saxe  */
721*0e751525SEric Saxe void
722*0e751525SEric Saxe pg_ev_thread_swtch(struct cpu *cp, hrtime_t now, kthread_t *old, kthread_t *new)
723*0e751525SEric Saxe {
724*0e751525SEric Saxe 	int	i, sz;
725*0e751525SEric Saxe 	group_t	*grp;
726*0e751525SEric Saxe 	pg_t	*pg;
727*0e751525SEric Saxe 
728*0e751525SEric Saxe 	grp = &cp->cpu_pg->pgs;
729*0e751525SEric Saxe 	sz = GROUP_SIZE(grp);
730*0e751525SEric Saxe 	for (i = 0; i < sz; i++) {
731*0e751525SEric Saxe 		pg = GROUP_ACCESS(grp, i);
732*0e751525SEric Saxe 		pg->pg_cb.thread_swtch(pg, cp, now, old, new);
733*0e751525SEric Saxe 	}
734*0e751525SEric Saxe }
735*0e751525SEric Saxe 
736*0e751525SEric Saxe /*
737*0e751525SEric Saxe  * Invoke the "thread remain" callback for each of the CPU's PGs.
738*0e751525SEric Saxe  * This is called from the dispatcher's swtch() routine when a thread
739*0e751525SEric Saxe  * running on the CPU "cp" is switching to itself, which can happen as an
740*0e751525SEric Saxe  * artifact of the thread's timeslice expiring.
741*0e751525SEric Saxe  */
742*0e751525SEric Saxe void
743*0e751525SEric Saxe pg_ev_thread_remain(struct cpu *cp, kthread_t *t)
744*0e751525SEric Saxe {
745*0e751525SEric Saxe 	int	i, sz;
746*0e751525SEric Saxe 	group_t	*grp;
747*0e751525SEric Saxe 	pg_t	*pg;
748*0e751525SEric Saxe 
749*0e751525SEric Saxe 	grp = &cp->cpu_pg->pgs;
750*0e751525SEric Saxe 	sz = GROUP_SIZE(grp);
751*0e751525SEric Saxe 	for (i = 0; i < sz; i++) {
752*0e751525SEric Saxe 		pg = GROUP_ACCESS(grp, i);
753*0e751525SEric Saxe 		pg->pg_cb.thread_remain(pg, cp, t);
754*0e751525SEric Saxe 	}
755*0e751525SEric Saxe }
756