xref: /titanic_52/usr/src/uts/common/os/pghw.c (revision 6a1af1a67532df169a657cce07140be64bdea084)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/systm.h>
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/thread.h>
29 #include <sys/cpuvar.h>
30 #include <sys/kmem.h>
31 #include <sys/cmn_err.h>
32 #include <sys/policy.h>
33 #include <sys/group.h>
34 #include <sys/pg.h>
35 #include <sys/pghw.h>
36 #include <sys/cpu_pm.h>
37 #include <sys/cap_util.h>
38 
39 /*
40  * Processor Groups: Hardware sharing relationship layer
41  *
42  * This file implements an extension to Processor Groups to capture
43  * hardware sharing relationships existing between logical CPUs. Examples of
44  * hardware sharing relationships include shared caches on some CMT
45  * procesoor architectures, or shared local memory controllers on NUMA
46  * based system architectures.
47  *
48  * The pghw_t structure represents the extended PG. The first member
49  * of the structure is the generic pg_t with the pghw specific members
50  * following. The generic pg_t *must* remain the first member of the
51  * structure as the code uses casting of structure references to access
52  * the generic pg_t structure elements.
53  *
54  * In addition to the generic CPU grouping, physical PGs have a hardware
55  * sharing relationship enumerated "type", and an instance id. The enumerated
56  * type is defined by the pghw_type_t enumeration, while the instance id
57  * uniquely identifies the sharing instance from among others of the same
58  * hardware sharing type.
59  *
60  * The physical PGs are organized into an overall hierarchy, and are tracked
61  * in a number of different per CPU, and per pghw_type_t type groups.
62  * As an example:
63  *
64  * -------------
65  * | pg_hw     |
66  * | (group_t) |
67  * -------------
68  *  ||                          ============================
69  *  ||\\-----------------------//       \\                 \\
70  *  ||  | hwset (PGC_HW_CHIP) |        -------------      -------------
71  *  ||  | (group_t)           |        | pghw_t    |      | pghw_t    |
72  *  ||  -----------------------        | chip 0    |      | chip 1    |
73  *  ||                                 -------------      -------------
74  *  ||                                 \\  \\  \\  \\     \\  \\  \\  \\
75  *  ||                                  cpu cpu cpu cpu    cpu cpu cpu cpu
76  *  ||
77  *  ||                          ============================
78  *  ||\\-----------------------//       \\                 \\
79  *  ||  | hwset (PGC_HW_IPIPE)|        -------------      -------------
80  *  ||  | (group_t)           |        | pghw_t    |      | pghw_t    |
81  *  ||  -----------------------        | ipipe 0   |      | ipipe 1   |
82  *  ||                                 -------------      -------------
83  *  ||                                 \\  \\             \\  \\
84  *  ||                                  cpu cpu            cpu cpu
85  *  ...
86  *
87  *
88  * The top level pg_hw is a group of "hwset" groups. Each hwset holds of group
89  * of physical PGs of the same hardware sharing type. Within each hwset, the
90  * PG's instance id uniquely identifies the grouping relationshsip among other
91  * groupings of the same sharing type. The instance id for a grouping is
92  * platform defined, and in some cases may be used by platform code as a handle
93  * to search for a particular relationship instance.
94  *
95  * Each physical PG (by virtue of the embedded pg_t) contains a group of CPUs
96  * that participate in the sharing relationship. Each CPU also has associated
97  * with it a grouping tracking the PGs in which the CPU belongs. This can be
98  * used to iterate over the various relationships in which the CPU participates
99  * (the CPU's chip, cache, lgroup, etc.).
100  *
101  * The hwsets are created dynamically as new hardware sharing relationship types
102  * are instantiated. They are never destroyed, as once a given relationship
103  * type appears in the system, it is quite likely that at least one instance of
104  * that relationship will always persist as long as the system is running.
105  */
106 
107 static group_t		*pg_hw;		/* top level pg hw group */
108 
109 /*
110  * Physical PG kstats
111  */
112 struct pghw_kstat {
113 	kstat_named_t	pg_id;
114 	kstat_named_t	pg_class;
115 	kstat_named_t	pg_ncpus;
116 	kstat_named_t	pg_instance_id;
117 	kstat_named_t	pg_hw;
118 	kstat_named_t	pg_policy;
119 } pghw_kstat = {
120 	{ "id",			KSTAT_DATA_INT32 },
121 	{ "pg_class",		KSTAT_DATA_STRING },
122 	{ "ncpus",		KSTAT_DATA_UINT32 },
123 	{ "instance_id",	KSTAT_DATA_UINT32 },
124 	{ "hardware",		KSTAT_DATA_STRING },
125 	{ "policy",		KSTAT_DATA_STRING },
126 };
127 
128 kmutex_t		pghw_kstat_lock;
129 
130 /*
131  * Capacity and Utilization PG kstats
132  *
133  * These kstats are updated one at a time, so we can have a single scratch space
134  * to fill the data.
135  *
136  * kstat fields:
137  *
138  *   pg_id		PG ID for PG described by this kstat
139  *
140  *   pg_parent		Parent PG ID. The value -1 means "no parent".
141  *
142  *   pg_ncpus		Number of CPUs within this PG
143  *
144  *   pg_cpus		String describing CPUs within this PG
145  *
146  *   pg_relationship	Name of sharing relationship for this PG
147  *
148  *   pg_generation	Generation value that increases whenever any CPU leaves
149  *			  or joins PG. Two kstat snapshots for the same
150  *			  CPU may only be compared if they have the same
151  *			  generation
152  *
153  *   pg_hw_util		Running value of PG utilization for the sharing
154  *			  relationship
155  *
156  *   pg_hw_util_time_running
157  *			Total time spent collecting CU data. The time may be
158  *			less than wall time if CU counters were stopped for
159  *			some time.
160  *
161  *   pg_hw_util_time_stopped Total time the CU counters were stopped.
162  *
163  *   pg_hw_util_rate	Utilization rate, expressed in operations per second.
164  *
165  *   pg_hw_util_rate_max Maximum observed value of utilization rate.
166  */
167 struct pghw_cu_kstat {
168 	kstat_named_t	pg_id;
169 	kstat_named_t	pg_parent_id;
170 	kstat_named_t	pg_ncpus;
171 	kstat_named_t	pg_generation;
172 	kstat_named_t	pg_hw_util;
173 	kstat_named_t	pg_hw_util_time_running;
174 	kstat_named_t	pg_hw_util_time_stopped;
175 	kstat_named_t	pg_hw_util_rate;
176 	kstat_named_t	pg_hw_util_rate_max;
177 	kstat_named_t	pg_cpus;
178 	kstat_named_t	pg_relationship;
179 } pghw_cu_kstat = {
180 	{ "pg_id",		KSTAT_DATA_INT32 },
181 	{ "parent_pg_id",	KSTAT_DATA_INT32 },
182 	{ "ncpus",		KSTAT_DATA_UINT32 },
183 	{ "generation",		KSTAT_DATA_UINT32   },
184 	{ "hw_util",		KSTAT_DATA_UINT64   },
185 	{ "hw_util_time_running",	KSTAT_DATA_UINT64   },
186 	{ "hw_util_time_stopped",	KSTAT_DATA_UINT64   },
187 	{ "hw_util_rate",	KSTAT_DATA_UINT64   },
188 	{ "hw_util_rate_max",	KSTAT_DATA_UINT64   },
189 	{ "cpus",		KSTAT_DATA_STRING   },
190 	{ "relationship",	KSTAT_DATA_STRING   },
191 };
192 
193 /*
194  * Calculate the string size to represent NCPUS. Allow 5 digits for each CPU ID
195  * plus one space per CPU plus NUL byte in the end. This is only an estimate,
196  * since we try to compress CPU ranges as x-y. In the worst case the string
197  * representation of CPUs may be truncated.
198  */
199 #define	CPUSTR_LEN(ncpus) ((ncpus) * 6)
200 
201 /*
202  * Maximum length of the string that represents list of CPUs
203  */
204 static int pg_cpulist_maxlen = 0;
205 
206 static void		pghw_kstat_create(pghw_t *);
207 static int		pghw_kstat_update(kstat_t *, int);
208 static int		pghw_cu_kstat_update(kstat_t *, int);
209 static int		cpu2id(void *);
210 
211 /*
212  * hwset operations
213  */
214 static group_t		*pghw_set_create(pghw_type_t);
215 static void		pghw_set_add(group_t *, pghw_t *);
216 static void		pghw_set_remove(group_t *, pghw_t *);
217 
218 static void		pghw_cpulist_alloc(pghw_t *);
219 static int		cpu2id(void *);
220 static pgid_t		pghw_parent_id(pghw_t *);
221 
222 /*
223  * Initialize the physical portion of a hardware PG
224  */
225 void
226 pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw)
227 {
228 	group_t		*hwset;
229 
230 	if ((hwset = pghw_set_lookup(hw)) == NULL) {
231 		/*
232 		 * Haven't seen this hardware type yet
233 		 */
234 		hwset = pghw_set_create(hw);
235 	}
236 
237 	pghw_set_add(hwset, pg);
238 	pg->pghw_hw = hw;
239 	pg->pghw_generation = 0;
240 	pg->pghw_instance =
241 	    pg_plat_hw_instance_id(cp, hw);
242 	pghw_kstat_create(pg);
243 
244 	/*
245 	 * Hardware sharing relationship specific initialization
246 	 */
247 	switch (pg->pghw_hw) {
248 	case PGHW_POW_ACTIVE:
249 		pg->pghw_handle =
250 		    (pghw_handle_t)cpupm_domain_init(cp, CPUPM_DTYPE_ACTIVE);
251 		break;
252 	case PGHW_POW_IDLE:
253 		pg->pghw_handle =
254 		    (pghw_handle_t)cpupm_domain_init(cp, CPUPM_DTYPE_IDLE);
255 		break;
256 	default:
257 		pg->pghw_handle = (pghw_handle_t)NULL;
258 	}
259 }
260 
261 /*
262  * Teardown the physical portion of a physical PG
263  */
264 void
265 pghw_fini(pghw_t *pg)
266 {
267 	group_t		*hwset;
268 
269 	pghw_cmt_fini(pg);
270 
271 	hwset = pghw_set_lookup(pg->pghw_hw);
272 	ASSERT(hwset != NULL);
273 
274 	pghw_set_remove(hwset, pg);
275 	pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON;
276 	pg->pghw_hw = (pghw_type_t)-1;
277 
278 	if (pg->pghw_kstat != NULL)
279 		kstat_delete(pg->pghw_kstat);
280 
281 }
282 
283 /*
284  * PG is removed from CMT hierarchy
285  */
286 void
287 pghw_cmt_fini(pghw_t *pg)
288 {
289 	/*
290 	 * Destroy string representation of CPUs
291 	 */
292 	if (pg->pghw_cpulist != NULL) {
293 		kmem_free(pg->pghw_cpulist,
294 		    pg->pghw_cpulist_len);
295 		pg->pghw_cpulist = NULL;
296 	}
297 
298 	/*
299 	 * Destroy CU kstats
300 	 */
301 	if (pg->pghw_cu_kstat != NULL) {
302 		kstat_delete(pg->pghw_cu_kstat);
303 		pg->pghw_cu_kstat = NULL;
304 	}
305 }
306 
307 /*
308  * Find an existing physical PG in which to place
309  * the given CPU for the specified hardware sharing
310  * relationship
311  */
312 pghw_t *
313 pghw_place_cpu(cpu_t *cp, pghw_type_t hw)
314 {
315 	group_t		*hwset;
316 
317 	if ((hwset = pghw_set_lookup(hw)) == NULL) {
318 		return (NULL);
319 	}
320 
321 	return ((pghw_t *)pg_cpu_find_pg(cp, hwset));
322 }
323 
324 /*
325  * Find the pg representing the hw sharing relationship in which
326  * cp belongs
327  */
328 pghw_t *
329 pghw_find_pg(cpu_t *cp, pghw_type_t hw)
330 {
331 	group_iter_t	i;
332 	pghw_t	*pg;
333 
334 	group_iter_init(&i);
335 	while ((pg = group_iterate(&cp->cpu_pg->pgs, &i)) != NULL) {
336 		if (pg->pghw_hw == hw)
337 			return (pg);
338 	}
339 	return (NULL);
340 }
341 
342 /*
343  * Find the PG of the given hardware sharing relationship
344  * type with the given instance id
345  */
346 pghw_t *
347 pghw_find_by_instance(id_t id, pghw_type_t hw)
348 {
349 	group_iter_t	i;
350 	group_t		*set;
351 	pghw_t		*pg;
352 
353 	set = pghw_set_lookup(hw);
354 	if (!set)
355 		return (NULL);
356 
357 	group_iter_init(&i);
358 	while ((pg = group_iterate(set, &i)) != NULL) {
359 		if (pg->pghw_instance == id)
360 			return (pg);
361 	}
362 	return (NULL);
363 }
364 
365 /*
366  * CPUs physical ID cache creation / destruction
367  * The cache's elements are initialized to the CPU's id
368  */
369 void
370 pghw_physid_create(cpu_t *cp)
371 {
372 	int	i;
373 
374 	cp->cpu_physid = kmem_alloc(sizeof (cpu_physid_t), KM_SLEEP);
375 
376 	for (i = 0; i < (sizeof (cpu_physid_t) / sizeof (id_t)); i++) {
377 		((id_t *)cp->cpu_physid)[i] = cp->cpu_id;
378 	}
379 }
380 
381 void
382 pghw_physid_destroy(cpu_t *cp)
383 {
384 	if (cp->cpu_physid) {
385 		kmem_free(cp->cpu_physid, sizeof (cpu_physid_t));
386 		cp->cpu_physid = NULL;
387 	}
388 }
389 
390 /*
391  * Create a new, empty hwset.
392  * This routine may block, and must not be called from any
393  * paused CPU context.
394  */
395 static group_t	*
396 pghw_set_create(pghw_type_t hw)
397 {
398 	group_t	*g;
399 	int	ret;
400 
401 	/*
402 	 * Create the top level PG hw group if it doesn't already exist
403 	 * This is a "set" of hardware sets, that is ordered (and indexed)
404 	 * by the pghw_type_t enum.
405 	 */
406 	if (pg_hw == NULL) {
407 		pg_hw = kmem_alloc(sizeof (group_t), KM_SLEEP);
408 		group_create(pg_hw);
409 		group_expand(pg_hw, (uint_t)PGHW_NUM_COMPONENTS);
410 	}
411 
412 	/*
413 	 * Create the new hwset
414 	 * Add it to the top level pg_hw group.
415 	 */
416 	g = kmem_alloc(sizeof (group_t), KM_SLEEP);
417 	group_create(g);
418 
419 	ret = group_add_at(pg_hw, g, (uint_t)hw);
420 	ASSERT(ret == 0);
421 
422 	return (g);
423 }
424 
425 /*
426  * Find the hwset associated with the given hardware sharing type
427  */
428 group_t *
429 pghw_set_lookup(pghw_type_t hw)
430 {
431 	group_t	*hwset;
432 
433 	if (pg_hw == NULL)
434 		return (NULL);
435 
436 	hwset = GROUP_ACCESS(pg_hw, (uint_t)hw);
437 	return (hwset);
438 }
439 
440 /*
441  * Add a PG to a hwset
442  */
443 static void
444 pghw_set_add(group_t *hwset, pghw_t *pg)
445 {
446 	(void) group_add(hwset, pg, GRP_RESIZE);
447 }
448 
449 /*
450  * Remove a PG from a hwset
451  */
452 static void
453 pghw_set_remove(group_t *hwset, pghw_t *pg)
454 {
455 	int result;
456 
457 	result = group_remove(hwset, pg, GRP_RESIZE);
458 	ASSERT(result == 0);
459 }
460 
461 /*
462  * Return a string name given a pg_hw sharing type
463  */
464 char *
465 pghw_type_string(pghw_type_t hw)
466 {
467 	switch (hw) {
468 	case PGHW_IPIPE:
469 		return ("Integer Pipeline");
470 	case PGHW_CACHE:
471 		return ("Cache");
472 	case PGHW_FPU:
473 		return ("Floating Point Unit");
474 	case PGHW_MPIPE:
475 		return ("Data Pipe to memory");
476 	case PGHW_CHIP:
477 		return ("Socket");
478 	case PGHW_MEMORY:
479 		return ("Memory");
480 	case PGHW_POW_ACTIVE:
481 		return ("CPU PM Active Power Domain");
482 	case PGHW_POW_IDLE:
483 		return ("CPU PM Idle Power Domain");
484 	default:
485 		return ("unknown");
486 	}
487 }
488 
489 /*
490  * Create / Update routines for PG hw kstats
491  *
492  * It is the intention of these kstats to provide some level
493  * of informational / debugging observability into the types
494  * and nature of the system's detected hardware sharing relationships
495  */
496 void
497 pghw_kstat_create(pghw_t *pg)
498 {
499 	char *sharing = pghw_type_string(pg->pghw_hw);
500 	char name[KSTAT_STRLEN + 1];
501 
502 	/*
503 	 * Canonify PG name to conform to kstat name rules
504 	 */
505 	(void) strncpy(name, pghw_type_string(pg->pghw_hw), KSTAT_STRLEN + 1);
506 	strident_canon(name, KSTAT_STRLEN + 1);
507 
508 	/*
509 	 * Create a hardware performance kstat
510 	 */
511 	if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id,
512 	    "pg", "pg",
513 	    KSTAT_TYPE_NAMED,
514 	    sizeof (pghw_kstat) / sizeof (kstat_named_t),
515 	    KSTAT_FLAG_VIRTUAL)) != NULL) {
516 		/* Class string, hw string, and policy string */
517 		pg->pghw_kstat->ks_data_size += PG_CLASS_NAME_MAX;
518 		pg->pghw_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX;
519 		pg->pghw_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX;
520 		pg->pghw_kstat->ks_lock = &pghw_kstat_lock;
521 		pg->pghw_kstat->ks_data = &pghw_kstat;
522 		pg->pghw_kstat->ks_update = pghw_kstat_update;
523 		pg->pghw_kstat->ks_private = pg;
524 		kstat_install(pg->pghw_kstat);
525 	}
526 
527 	if (pg_cpulist_maxlen == 0)
528 		pg_cpulist_maxlen = CPUSTR_LEN(max_ncpus);
529 
530 	/*
531 	 * Create a physical pg kstat
532 	 */
533 	if ((pg->pghw_cu_kstat = kstat_create("pg_hw_perf", ((pg_t *)pg)->pg_id,
534 	    name, "processor_group",
535 	    KSTAT_TYPE_NAMED,
536 	    sizeof (pghw_cu_kstat) / sizeof (kstat_named_t),
537 	    KSTAT_FLAG_VIRTUAL)) != NULL) {
538 		pg->pghw_cu_kstat->ks_lock = &pghw_kstat_lock;
539 		pg->pghw_cu_kstat->ks_data = &pghw_cu_kstat;
540 		pg->pghw_cu_kstat->ks_update = pghw_cu_kstat_update;
541 		pg->pghw_cu_kstat->ks_private = pg;
542 		pg->pghw_cu_kstat->ks_data_size += strlen(sharing) + 1;
543 		/* Allow space for CPU strings */
544 		pg->pghw_cu_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX;
545 		pg->pghw_cu_kstat->ks_data_size += pg_cpulist_maxlen;
546 		kstat_install(pg->pghw_cu_kstat);
547 	}
548 }
549 
550 int
551 pghw_kstat_update(kstat_t *ksp, int rw)
552 {
553 	struct pghw_kstat	*pgsp = &pghw_kstat;
554 	pghw_t			*pg = ksp->ks_private;
555 
556 	if (rw == KSTAT_WRITE)
557 		return (EACCES);
558 
559 	pgsp->pg_id.value.ui32 = ((pg_t *)pg)->pg_id;
560 	pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
561 	pgsp->pg_instance_id.value.ui32 = pg->pghw_instance;
562 	kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name);
563 	kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw));
564 	kstat_named_setstr(&pgsp->pg_policy, pg_policy_name((pg_t *)pg));
565 	return (0);
566 }
567 
568 int
569 pghw_cu_kstat_update(kstat_t *ksp, int rw)
570 {
571 	struct pghw_cu_kstat	*pgsp = &pghw_cu_kstat;
572 	pghw_t			*pg = ksp->ks_private;
573 	pghw_util_t		*hw_util = &pg->pghw_stats;
574 	boolean_t		has_cpc_privilege;
575 
576 	if (rw == KSTAT_WRITE)
577 		return (EACCES);
578 
579 	/*
580 	 * Check whether the caller has priv_cpc_cpu privilege. If he doesn't,
581 	 * he will not get hardware utilization data.
582 	 */
583 
584 	has_cpc_privilege = (secpolicy_cpc_cpu(crgetcred()) == 0);
585 
586 	pgsp->pg_id.value.i32 = ((pg_t *)pg)->pg_id;
587 	pgsp->pg_parent_id.value.i32 = (int)pghw_parent_id(pg);
588 
589 	pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
590 
591 	/*
592 	 * Allocate memory for the string representing the list of CPUs in PG.
593 	 * This memory should persist past the call to pghw_cu_kstat_update()
594 	 * since the kstat snapshot routine will reference this memory.
595 	 */
596 	pghw_cpulist_alloc(pg);
597 
598 	if (pg->pghw_kstat_gen != pg->pghw_generation) {
599 		/*
600 		 * PG kstat generation number is out of sync with PG's
601 		 * generation mumber. It means that some CPUs could have joined
602 		 * or left PG and it is not possible to compare the numbers
603 		 * obtained before and after the generation change.
604 		 *
605 		 * Reset the maximum utilization rate and start computing it
606 		 * from scratch.
607 		 */
608 		hw_util->pghw_util = 0;
609 		hw_util->pghw_rate_max = 0;
610 		pg->pghw_kstat_gen = pg->pghw_generation;
611 	}
612 
613 	/*
614 	 * We can't block on CPU lock because when PG is destroyed (under
615 	 * cpu_lock) it tries to delete this kstat and it will wait for us to
616 	 * complete which will never happen since we are waiting for cpu_lock to
617 	 * drop. Deadlocks are fun!
618 	 */
619 	if (mutex_tryenter(&cpu_lock)) {
620 		if (pg->pghw_cpulist != NULL &&
621 		    *(pg->pghw_cpulist) == '\0') {
622 			(void) group2intlist(&(((pg_t *)pg)->pg_cpus),
623 			    pg->pghw_cpulist, pg->pghw_cpulist_len, cpu2id);
624 		}
625 
626 		if (has_cpc_privilege)
627 			cu_pg_update(pg);
628 
629 		mutex_exit(&cpu_lock);
630 	}
631 
632 	pgsp->pg_generation.value.ui32 = pg->pghw_kstat_gen;
633 	if (pg->pghw_cpulist != NULL)
634 		kstat_named_setstr(&pgsp->pg_cpus, pg->pghw_cpulist);
635 	else
636 		kstat_named_setstr(&pgsp->pg_cpus, "");
637 
638 	kstat_named_setstr(&pgsp->pg_relationship,
639 	    pghw_type_string(pg->pghw_hw));
640 
641 	if (has_cpc_privilege) {
642 		pgsp->pg_hw_util.value.ui64 = hw_util->pghw_util;
643 		pgsp->pg_hw_util_time_running.value.ui64 =
644 		    hw_util->pghw_time_running;
645 		pgsp->pg_hw_util_time_stopped.value.ui64 =
646 		    hw_util->pghw_time_stopped;
647 		pgsp->pg_hw_util_rate.value.ui64 = hw_util->pghw_rate;
648 		pgsp->pg_hw_util_rate_max.value.ui64 = hw_util->pghw_rate_max;
649 	} else {
650 		pgsp->pg_hw_util.value.ui64 = 0;
651 		pgsp->pg_hw_util_time_running.value.ui64 = 0;
652 		pgsp->pg_hw_util_time_stopped.value.ui64 = 0;
653 		pgsp->pg_hw_util_rate.value.ui64 = 0;
654 		pgsp->pg_hw_util_rate_max.value.ui64 = 0;
655 	}
656 
657 	return (0);
658 }
659 
660 /*
661  * Update the string representation of CPUs in PG (pg->pghw_cpulist).
662  * The string representation is used for kstats.
663  *
664  * The string is allocated if it has not already been or if it is already
665  * allocated and PG has more CPUs now. If PG has smaller or equal number of
666  * CPUs, but the actual CPUs may have changed, the string is reset to the empty
667  * string causes the string representation to be recreated. The pghw_generation
668  * field is used to detect whether CPUs within the pg may have changed.
669  */
670 static void
671 pghw_cpulist_alloc(pghw_t *pg)
672 {
673 	uint_t	ncpus = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
674 	size_t	len = CPUSTR_LEN(ncpus);
675 
676 	/*
677 	 * If the pghw_cpulist string is already allocated we need to make sure
678 	 * that it has sufficient length. Also if the set of CPUs may have
679 	 * changed, we need to re-generate the string.
680 	 */
681 	if (pg->pghw_cpulist != NULL &&
682 	    pg->pghw_kstat_gen != pg->pghw_generation) {
683 		if (len <= pg->pghw_cpulist_len) {
684 			/*
685 			 * There is sufficient space in the pghw_cpulist for
686 			 * the new set of CPUs. Just clear the string to trigger
687 			 * re-generation of list of CPUs
688 			 */
689 			*(pg->pghw_cpulist) = '\0';
690 		} else {
691 			/*
692 			 * There is, potentially, insufficient space in
693 			 * pghw_cpulist, so reallocate the string.
694 			 */
695 			ASSERT(strlen(pg->pghw_cpulist) < pg->pghw_cpulist_len);
696 			kmem_free(pg->pghw_cpulist, pg->pghw_cpulist_len);
697 			pg->pghw_cpulist = NULL;
698 			pg->pghw_cpulist_len = 0;
699 		}
700 	}
701 
702 	if (pg->pghw_cpulist == NULL) {
703 		/*
704 		 * Allocate space to hold cpulist.
705 		 *
706 		 * Length can not be bigger that the maximum space we have
707 		 * allowed for the kstat buffer
708 		 */
709 		if (len > pg_cpulist_maxlen)
710 			len = pg_cpulist_maxlen;
711 		if (len > 0) {
712 			pg->pghw_cpulist = kmem_zalloc(len, KM_NOSLEEP);
713 			if (pg->pghw_cpulist != NULL)
714 				pg->pghw_cpulist_len = len;
715 		}
716 	}
717 }
718 
719 static int
720 cpu2id(void *v)
721 {
722 	cpu_t *cp = (cpu_t *)v;
723 
724 	ASSERT(v != NULL);
725 
726 	return (cp->cpu_id);
727 }
728 
729 /*
730  * Return parent ID or -1 if there is no parent.
731  * All hardware PGs are currently also CMT PGs, but for safety we check the
732  * class matches cmt before we upcast the pghw pointer to pg_cmt_t.
733  */
734 static pgid_t
735 pghw_parent_id(pghw_t *pghw)
736 {
737 	pg_t *pg = (pg_t *)pghw;
738 	pgid_t parent_id = -1;
739 
740 	if (pg != NULL && strcmp(pg->pg_class->pgc_name, "cmt") == 0) {
741 		pg_cmt_t *cmt = (pg_cmt_t *)pg;
742 		pg_t *parent = (pg_t *)cmt->cmt_parent;
743 		if (parent != NULL)
744 			parent_id = parent->pg_id;
745 	}
746 
747 	return (parent_id);
748 }
749