xref: /illumos-gate/usr/src/uts/common/os/pghw.c (revision 48bbca816818409505a6e214d0911fda44e622e3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2016 by Delphix. All rights reserved.
24  */
25 
26 #include <sys/systm.h>
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/thread.h>
30 #include <sys/cpuvar.h>
31 #include <sys/kmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/policy.h>
34 #include <sys/group.h>
35 #include <sys/pg.h>
36 #include <sys/pghw.h>
37 #include <sys/cpu_pm.h>
38 #include <sys/cap_util.h>
39 
40 /*
41  * Processor Groups: Hardware sharing relationship layer
42  *
43  * This file implements an extension to Processor Groups to capture
44  * hardware sharing relationships existing between logical CPUs. Examples of
45  * hardware sharing relationships include shared caches on some CMT
46  * procesoor architectures, or shared local memory controllers on NUMA
47  * based system architectures.
48  *
49  * The pghw_t structure represents the extended PG. The first member
50  * of the structure is the generic pg_t with the pghw specific members
51  * following. The generic pg_t *must* remain the first member of the
52  * structure as the code uses casting of structure references to access
53  * the generic pg_t structure elements.
54  *
55  * In addition to the generic CPU grouping, physical PGs have a hardware
56  * sharing relationship enumerated "type", and an instance id. The enumerated
57  * type is defined by the pghw_type_t enumeration, while the instance id
58  * uniquely identifies the sharing instance from among others of the same
59  * hardware sharing type.
60  *
61  * The physical PGs are organized into an overall hierarchy, and are tracked
62  * in a number of different per CPU, and per pghw_type_t type groups.
63  * As an example:
64  *
65  * -------------
66  * | pg_hw     |
67  * | (group_t) |
68  * -------------
69  *  ||                          ============================
70  *  ||\\-----------------------//       \\                 \\
71  *  ||  | hwset (PGC_HW_CHIP) |        -------------      -------------
72  *  ||  | (group_t)           |        | pghw_t    |      | pghw_t    |
73  *  ||  -----------------------        | chip 0    |      | chip 1    |
74  *  ||                                 -------------      -------------
75  *  ||                                 \\  \\  \\  \\     \\  \\  \\  \\
76  *  ||                                  cpu cpu cpu cpu    cpu cpu cpu cpu
77  *  ||
78  *  ||                          ============================
79  *  ||\\-----------------------//       \\                 \\
80  *  ||  | hwset (PGC_HW_IPIPE)|        -------------      -------------
81  *  ||  | (group_t)           |        | pghw_t    |      | pghw_t    |
82  *  ||  -----------------------        | ipipe 0   |      | ipipe 1   |
83  *  ||                                 -------------      -------------
84  *  ||                                 \\  \\             \\  \\
85  *  ||                                  cpu cpu            cpu cpu
86  *  ...
87  *
88  *
89  * The top level pg_hw is a group of "hwset" groups. Each hwset holds of group
90  * of physical PGs of the same hardware sharing type. Within each hwset, the
91  * PG's instance id uniquely identifies the grouping relationshsip among other
92  * groupings of the same sharing type. The instance id for a grouping is
93  * platform defined, and in some cases may be used by platform code as a handle
94  * to search for a particular relationship instance.
95  *
96  * Each physical PG (by virtue of the embedded pg_t) contains a group of CPUs
97  * that participate in the sharing relationship. Each CPU also has associated
98  * with it a grouping tracking the PGs in which the CPU belongs. This can be
99  * used to iterate over the various relationships in which the CPU participates
100  * (the CPU's chip, cache, lgroup, etc.).
101  *
102  * The hwsets are created dynamically as new hardware sharing relationship types
103  * are instantiated. They are never destroyed, as once a given relationship
104  * type appears in the system, it is quite likely that at least one instance of
105  * that relationship will always persist as long as the system is running.
106  */
107 
108 static group_t		*pg_hw;		/* top level pg hw group */
109 
110 /*
111  * Physical PG kstats
112  */
113 struct pghw_kstat {
114 	kstat_named_t	pg_id;
115 	kstat_named_t	pg_class;
116 	kstat_named_t	pg_ncpus;
117 	kstat_named_t	pg_instance_id;
118 	kstat_named_t	pg_hw;
119 	kstat_named_t	pg_policy;
120 } pghw_kstat = {
121 	{ "id",			KSTAT_DATA_INT32 },
122 	{ "pg_class",		KSTAT_DATA_STRING },
123 	{ "ncpus",		KSTAT_DATA_UINT32 },
124 	{ "instance_id",	KSTAT_DATA_UINT32 },
125 	{ "hardware",		KSTAT_DATA_STRING },
126 	{ "policy",		KSTAT_DATA_STRING },
127 };
128 
129 kmutex_t		pghw_kstat_lock;
130 
131 /*
132  * Capacity and Utilization PG kstats
133  *
134  * These kstats are updated one at a time, so we can have a single scratch space
135  * to fill the data.
136  *
137  * kstat fields:
138  *
139  *   pg_id		PG ID for PG described by this kstat
140  *
141  *   pg_parent		Parent PG ID. The value -1 means "no parent".
142  *
143  *   pg_ncpus		Number of CPUs within this PG
144  *
145  *   pg_cpus		String describing CPUs within this PG
146  *
147  *   pg_relationship	Name of sharing relationship for this PG
148  *
149  *   pg_generation	Generation value that increases whenever any CPU leaves
150  *			  or joins PG. Two kstat snapshots for the same
151  *			  CPU may only be compared if they have the same
152  *			  generation
153  *
154  *   pg_hw_util		Running value of PG utilization for the sharing
155  *			  relationship
156  *
157  *   pg_hw_util_time_running
158  *			Total time spent collecting CU data. The time may be
159  *			less than wall time if CU counters were stopped for
160  *			some time.
161  *
162  *   pg_hw_util_time_stopped Total time the CU counters were stopped.
163  *
164  *   pg_hw_util_rate	Utilization rate, expressed in operations per second.
165  *
166  *   pg_hw_util_rate_max Maximum observed value of utilization rate.
167  */
168 struct pghw_cu_kstat {
169 	kstat_named_t	pg_id;
170 	kstat_named_t	pg_parent_id;
171 	kstat_named_t	pg_ncpus;
172 	kstat_named_t	pg_generation;
173 	kstat_named_t	pg_hw_util;
174 	kstat_named_t	pg_hw_util_time_running;
175 	kstat_named_t	pg_hw_util_time_stopped;
176 	kstat_named_t	pg_hw_util_rate;
177 	kstat_named_t	pg_hw_util_rate_max;
178 	kstat_named_t	pg_cpus;
179 	kstat_named_t	pg_relationship;
180 } pghw_cu_kstat = {
181 	{ "pg_id",		KSTAT_DATA_INT32 },
182 	{ "parent_pg_id",	KSTAT_DATA_INT32 },
183 	{ "ncpus",		KSTAT_DATA_UINT32 },
184 	{ "generation",		KSTAT_DATA_UINT32   },
185 	{ "hw_util",		KSTAT_DATA_UINT64   },
186 	{ "hw_util_time_running",	KSTAT_DATA_UINT64   },
187 	{ "hw_util_time_stopped",	KSTAT_DATA_UINT64   },
188 	{ "hw_util_rate",	KSTAT_DATA_UINT64   },
189 	{ "hw_util_rate_max",	KSTAT_DATA_UINT64   },
190 	{ "cpus",		KSTAT_DATA_STRING   },
191 	{ "relationship",	KSTAT_DATA_STRING   },
192 };
193 
194 /*
195  * Calculate the string size to represent NCPUS. Allow 5 digits for each CPU ID
196  * plus one space per CPU plus NUL byte in the end. This is only an estimate,
197  * since we try to compress CPU ranges as x-y. In the worst case the string
198  * representation of CPUs may be truncated.
199  */
200 #define	CPUSTR_LEN(ncpus) ((ncpus) * 6)
201 
202 /*
203  * Maximum length of the string that represents list of CPUs
204  */
205 static int pg_cpulist_maxlen = 0;
206 
207 static void		pghw_kstat_create(pghw_t *);
208 static int		pghw_kstat_update(kstat_t *, int);
209 static int		pghw_cu_kstat_update(kstat_t *, int);
210 static int		cpu2id(void *);
211 
212 /*
213  * hwset operations
214  */
215 static group_t		*pghw_set_create(pghw_type_t);
216 static void		pghw_set_add(group_t *, pghw_t *);
217 static void		pghw_set_remove(group_t *, pghw_t *);
218 
219 static void		pghw_cpulist_alloc(pghw_t *);
220 static int		cpu2id(void *);
221 static pgid_t		pghw_parent_id(pghw_t *);
222 
223 /*
224  * Initialize the physical portion of a hardware PG
225  */
226 void
pghw_init(pghw_t * pg,cpu_t * cp,pghw_type_t hw)227 pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw)
228 {
229 	group_t		*hwset;
230 
231 	if ((hwset = pghw_set_lookup(hw)) == NULL) {
232 		/*
233 		 * Haven't seen this hardware type yet
234 		 */
235 		hwset = pghw_set_create(hw);
236 	}
237 
238 	pghw_set_add(hwset, pg);
239 	pg->pghw_hw = hw;
240 	pg->pghw_generation = 0;
241 	pg->pghw_instance =
242 	    pg_plat_hw_instance_id(cp, hw);
243 	pghw_kstat_create(pg);
244 
245 	/*
246 	 * Hardware sharing relationship specific initialization
247 	 */
248 	switch (pg->pghw_hw) {
249 	case PGHW_POW_ACTIVE:
250 		pg->pghw_handle =
251 		    (pghw_handle_t)cpupm_domain_init(cp, CPUPM_DTYPE_ACTIVE);
252 		break;
253 	case PGHW_POW_IDLE:
254 		pg->pghw_handle =
255 		    (pghw_handle_t)cpupm_domain_init(cp, CPUPM_DTYPE_IDLE);
256 		break;
257 	default:
258 		pg->pghw_handle = (pghw_handle_t)NULL;
259 	}
260 }
261 
262 /*
263  * Teardown the physical portion of a physical PG
264  */
265 void
pghw_fini(pghw_t * pg)266 pghw_fini(pghw_t *pg)
267 {
268 	group_t		*hwset;
269 
270 	pghw_cmt_fini(pg);
271 
272 	hwset = pghw_set_lookup(pg->pghw_hw);
273 	ASSERT(hwset != NULL);
274 
275 	pghw_set_remove(hwset, pg);
276 	pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON;
277 	pg->pghw_hw = (pghw_type_t)-1;
278 
279 	if (pg->pghw_kstat != NULL)
280 		kstat_delete(pg->pghw_kstat);
281 
282 }
283 
284 /*
285  * PG is removed from CMT hierarchy
286  */
287 void
pghw_cmt_fini(pghw_t * pg)288 pghw_cmt_fini(pghw_t *pg)
289 {
290 	/*
291 	 * Destroy string representation of CPUs
292 	 */
293 	if (pg->pghw_cpulist != NULL) {
294 		kmem_free(pg->pghw_cpulist,
295 		    pg->pghw_cpulist_len);
296 		pg->pghw_cpulist = NULL;
297 	}
298 
299 	/*
300 	 * Destroy CU kstats
301 	 */
302 	if (pg->pghw_cu_kstat != NULL) {
303 		kstat_delete(pg->pghw_cu_kstat);
304 		pg->pghw_cu_kstat = NULL;
305 	}
306 }
307 
308 /*
309  * Find an existing physical PG in which to place
310  * the given CPU for the specified hardware sharing
311  * relationship
312  */
313 pghw_t *
pghw_place_cpu(cpu_t * cp,pghw_type_t hw)314 pghw_place_cpu(cpu_t *cp, pghw_type_t hw)
315 {
316 	group_t		*hwset;
317 
318 	if ((hwset = pghw_set_lookup(hw)) == NULL) {
319 		return (NULL);
320 	}
321 
322 	return ((pghw_t *)pg_cpu_find_pg(cp, hwset));
323 }
324 
325 /*
326  * Find the pg representing the hw sharing relationship in which
327  * cp belongs
328  */
329 pghw_t *
pghw_find_pg(cpu_t * cp,pghw_type_t hw)330 pghw_find_pg(cpu_t *cp, pghw_type_t hw)
331 {
332 	group_iter_t	i;
333 	pghw_t	*pg;
334 
335 	group_iter_init(&i);
336 	while ((pg = group_iterate(&cp->cpu_pg->pgs, &i)) != NULL) {
337 		if (pg->pghw_hw == hw)
338 			return (pg);
339 	}
340 	return (NULL);
341 }
342 
343 /*
344  * Find the PG of the given hardware sharing relationship
345  * type with the given instance id
346  */
347 pghw_t *
pghw_find_by_instance(id_t id,pghw_type_t hw)348 pghw_find_by_instance(id_t id, pghw_type_t hw)
349 {
350 	group_iter_t	i;
351 	group_t		*set;
352 	pghw_t		*pg;
353 
354 	set = pghw_set_lookup(hw);
355 	if (!set)
356 		return (NULL);
357 
358 	group_iter_init(&i);
359 	while ((pg = group_iterate(set, &i)) != NULL) {
360 		if (pg->pghw_instance == id)
361 			return (pg);
362 	}
363 	return (NULL);
364 }
365 
366 /*
367  * CPUs physical ID cache creation / destruction
368  * The cache's elements are initialized to the CPU's id
369  */
370 void
pghw_physid_create(cpu_t * cp)371 pghw_physid_create(cpu_t *cp)
372 {
373 	int	i;
374 
375 	cp->cpu_physid = kmem_alloc(sizeof (cpu_physid_t), KM_SLEEP);
376 
377 	for (i = 0; i < (sizeof (cpu_physid_t) / sizeof (id_t)); i++) {
378 		((id_t *)cp->cpu_physid)[i] = cp->cpu_id;
379 	}
380 }
381 
382 void
pghw_physid_destroy(cpu_t * cp)383 pghw_physid_destroy(cpu_t *cp)
384 {
385 	if (cp->cpu_physid) {
386 		kmem_free(cp->cpu_physid, sizeof (cpu_physid_t));
387 		cp->cpu_physid = NULL;
388 	}
389 }
390 
391 /*
392  * Create a new, empty hwset.
393  * This routine may block, and must not be called from any
394  * paused CPU context.
395  */
396 static group_t	*
pghw_set_create(pghw_type_t hw)397 pghw_set_create(pghw_type_t hw)
398 {
399 	group_t	*g;
400 	int	ret;
401 
402 	/*
403 	 * Create the top level PG hw group if it doesn't already exist
404 	 * This is a "set" of hardware sets, that is ordered (and indexed)
405 	 * by the pghw_type_t enum.
406 	 */
407 	if (pg_hw == NULL) {
408 		pg_hw = kmem_alloc(sizeof (group_t), KM_SLEEP);
409 		group_create(pg_hw);
410 		group_expand(pg_hw, (uint_t)PGHW_NUM_COMPONENTS);
411 	}
412 
413 	/*
414 	 * Create the new hwset
415 	 * Add it to the top level pg_hw group.
416 	 */
417 	g = kmem_alloc(sizeof (group_t), KM_SLEEP);
418 	group_create(g);
419 
420 	ret = group_add_at(pg_hw, g, (uint_t)hw);
421 	ASSERT(ret == 0);
422 
423 	return (g);
424 }
425 
426 /*
427  * Find the hwset associated with the given hardware sharing type
428  */
429 group_t *
pghw_set_lookup(pghw_type_t hw)430 pghw_set_lookup(pghw_type_t hw)
431 {
432 	group_t	*hwset;
433 
434 	if (pg_hw == NULL)
435 		return (NULL);
436 
437 	hwset = GROUP_ACCESS(pg_hw, (uint_t)hw);
438 	return (hwset);
439 }
440 
441 /*
442  * Add a PG to a hwset
443  */
444 static void
pghw_set_add(group_t * hwset,pghw_t * pg)445 pghw_set_add(group_t *hwset, pghw_t *pg)
446 {
447 	(void) group_add(hwset, pg, GRP_RESIZE);
448 }
449 
450 /*
451  * Remove a PG from a hwset
452  */
453 static void
pghw_set_remove(group_t * hwset,pghw_t * pg)454 pghw_set_remove(group_t *hwset, pghw_t *pg)
455 {
456 	int result;
457 
458 	result = group_remove(hwset, pg, GRP_RESIZE);
459 	ASSERT(result == 0);
460 }
461 
462 /*
463  * Return a string name given a pg_hw sharing type
464  */
465 char *
pghw_type_string(pghw_type_t hw)466 pghw_type_string(pghw_type_t hw)
467 {
468 	switch (hw) {
469 	case PGHW_IPIPE:
470 		return ("Integer Pipeline");
471 	case PGHW_CACHE:
472 		return ("Cache");
473 	case PGHW_FPU:
474 		return ("Floating Point Unit");
475 	case PGHW_MPIPE:
476 		return ("Data Pipe to memory");
477 	case PGHW_CHIP:
478 		return ("Socket");
479 	case PGHW_MEMORY:
480 		return ("Memory");
481 	case PGHW_POW_ACTIVE:
482 		return ("CPU PM Active Power Domain");
483 	case PGHW_POW_IDLE:
484 		return ("CPU PM Idle Power Domain");
485 	default:
486 		return ("unknown");
487 	}
488 }
489 
490 /*
491  * Create / Update routines for PG hw kstats
492  *
493  * It is the intention of these kstats to provide some level
494  * of informational / debugging observability into the types
495  * and nature of the system's detected hardware sharing relationships
496  */
497 void
pghw_kstat_create(pghw_t * pg)498 pghw_kstat_create(pghw_t *pg)
499 {
500 	char *sharing = pghw_type_string(pg->pghw_hw);
501 	char name[KSTAT_STRLEN + 1];
502 
503 	/*
504 	 * Canonify PG name to conform to kstat name rules
505 	 */
506 	(void) strncpy(name, pghw_type_string(pg->pghw_hw), KSTAT_STRLEN + 1);
507 	strident_canon(name, KSTAT_STRLEN + 1);
508 
509 	/*
510 	 * Create a hardware performance kstat
511 	 */
512 	if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id,
513 	    "pg", "pg",
514 	    KSTAT_TYPE_NAMED,
515 	    sizeof (pghw_kstat) / sizeof (kstat_named_t),
516 	    KSTAT_FLAG_VIRTUAL)) != NULL) {
517 		/* Class string, hw string, and policy string */
518 		pg->pghw_kstat->ks_data_size += PG_CLASS_NAME_MAX;
519 		pg->pghw_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX;
520 		pg->pghw_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX;
521 		pg->pghw_kstat->ks_lock = &pghw_kstat_lock;
522 		pg->pghw_kstat->ks_data = &pghw_kstat;
523 		pg->pghw_kstat->ks_update = pghw_kstat_update;
524 		pg->pghw_kstat->ks_private = pg;
525 		kstat_install(pg->pghw_kstat);
526 	}
527 
528 	if (pg_cpulist_maxlen == 0)
529 		pg_cpulist_maxlen = CPUSTR_LEN(max_ncpus);
530 
531 	/*
532 	 * Create a physical pg kstat
533 	 */
534 	if ((pg->pghw_cu_kstat = kstat_create("pg_hw_perf", ((pg_t *)pg)->pg_id,
535 	    name, "processor_group",
536 	    KSTAT_TYPE_NAMED,
537 	    sizeof (pghw_cu_kstat) / sizeof (kstat_named_t),
538 	    KSTAT_FLAG_VIRTUAL)) != NULL) {
539 		pg->pghw_cu_kstat->ks_lock = &pghw_kstat_lock;
540 		pg->pghw_cu_kstat->ks_data = &pghw_cu_kstat;
541 		pg->pghw_cu_kstat->ks_update = pghw_cu_kstat_update;
542 		pg->pghw_cu_kstat->ks_private = pg;
543 		pg->pghw_cu_kstat->ks_data_size += strlen(sharing) + 1;
544 		/* Allow space for CPU strings */
545 		pg->pghw_cu_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX;
546 		pg->pghw_cu_kstat->ks_data_size += pg_cpulist_maxlen;
547 		kstat_install(pg->pghw_cu_kstat);
548 	}
549 }
550 
551 int
pghw_kstat_update(kstat_t * ksp,int rw)552 pghw_kstat_update(kstat_t *ksp, int rw)
553 {
554 	struct pghw_kstat	*pgsp = &pghw_kstat;
555 	pghw_t			*pg = ksp->ks_private;
556 
557 	if (rw == KSTAT_WRITE)
558 		return (EACCES);
559 
560 	pgsp->pg_id.value.ui32 = ((pg_t *)pg)->pg_id;
561 	pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
562 	pgsp->pg_instance_id.value.ui32 = pg->pghw_instance;
563 	kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name);
564 	kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw));
565 	kstat_named_setstr(&pgsp->pg_policy, pg_policy_name((pg_t *)pg));
566 	return (0);
567 }
568 
569 int
pghw_cu_kstat_update(kstat_t * ksp,int rw)570 pghw_cu_kstat_update(kstat_t *ksp, int rw)
571 {
572 	struct pghw_cu_kstat	*pgsp = &pghw_cu_kstat;
573 	pghw_t			*pg = ksp->ks_private;
574 	pghw_util_t		*hw_util = &pg->pghw_stats;
575 	boolean_t		has_cpc_privilege;
576 
577 	if (rw == KSTAT_WRITE)
578 		return (EACCES);
579 
580 	/*
581 	 * Check whether the caller has priv_cpc_cpu privilege. If it doesn't,
582 	 * it will not get hardware utilization data.
583 	 */
584 
585 	has_cpc_privilege = (secpolicy_cpc_cpu(crgetcred()) == 0);
586 
587 	pgsp->pg_id.value.i32 = ((pg_t *)pg)->pg_id;
588 	pgsp->pg_parent_id.value.i32 = (int)pghw_parent_id(pg);
589 
590 	pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
591 
592 	/*
593 	 * Allocate memory for the string representing the list of CPUs in PG.
594 	 * This memory should persist past the call to pghw_cu_kstat_update()
595 	 * since the kstat snapshot routine will reference this memory.
596 	 */
597 	pghw_cpulist_alloc(pg);
598 
599 	if (pg->pghw_kstat_gen != pg->pghw_generation) {
600 		/*
601 		 * PG kstat generation number is out of sync with PG's
602 		 * generation mumber. It means that some CPUs could have joined
603 		 * or left PG and it is not possible to compare the numbers
604 		 * obtained before and after the generation change.
605 		 *
606 		 * Reset the maximum utilization rate and start computing it
607 		 * from scratch.
608 		 */
609 		hw_util->pghw_util = 0;
610 		hw_util->pghw_rate_max = 0;
611 		pg->pghw_kstat_gen = pg->pghw_generation;
612 	}
613 
614 	/*
615 	 * We can't block on CPU lock because when PG is destroyed (under
616 	 * cpu_lock) it tries to delete this kstat and it will wait for us to
617 	 * complete which will never happen since we are waiting for cpu_lock to
618 	 * drop. Deadlocks are fun!
619 	 */
620 	if (mutex_tryenter(&cpu_lock)) {
621 		if (pg->pghw_cpulist != NULL &&
622 		    *(pg->pghw_cpulist) == '\0') {
623 			(void) group2intlist(&(((pg_t *)pg)->pg_cpus),
624 			    pg->pghw_cpulist, pg->pghw_cpulist_len, cpu2id);
625 		}
626 
627 		if (has_cpc_privilege)
628 			cu_pg_update(pg);
629 
630 		mutex_exit(&cpu_lock);
631 	}
632 
633 	pgsp->pg_generation.value.ui32 = pg->pghw_kstat_gen;
634 	if (pg->pghw_cpulist != NULL)
635 		kstat_named_setstr(&pgsp->pg_cpus, pg->pghw_cpulist);
636 	else
637 		kstat_named_setstr(&pgsp->pg_cpus, "");
638 
639 	kstat_named_setstr(&pgsp->pg_relationship,
640 	    pghw_type_string(pg->pghw_hw));
641 
642 	if (has_cpc_privilege) {
643 		pgsp->pg_hw_util.value.ui64 = hw_util->pghw_util;
644 		pgsp->pg_hw_util_time_running.value.ui64 =
645 		    hw_util->pghw_time_running;
646 		pgsp->pg_hw_util_time_stopped.value.ui64 =
647 		    hw_util->pghw_time_stopped;
648 		pgsp->pg_hw_util_rate.value.ui64 = hw_util->pghw_rate;
649 		pgsp->pg_hw_util_rate_max.value.ui64 = hw_util->pghw_rate_max;
650 	} else {
651 		pgsp->pg_hw_util.value.ui64 = 0;
652 		pgsp->pg_hw_util_time_running.value.ui64 = 0;
653 		pgsp->pg_hw_util_time_stopped.value.ui64 = 0;
654 		pgsp->pg_hw_util_rate.value.ui64 = 0;
655 		pgsp->pg_hw_util_rate_max.value.ui64 = 0;
656 	}
657 
658 	return (0);
659 }
660 
661 /*
662  * Update the string representation of CPUs in PG (pg->pghw_cpulist).
663  * The string representation is used for kstats.
664  *
665  * The string is allocated if it has not already been or if it is already
666  * allocated and PG has more CPUs now. If PG has smaller or equal number of
667  * CPUs, but the actual CPUs may have changed, the string is reset to the empty
668  * string causes the string representation to be recreated. The pghw_generation
669  * field is used to detect whether CPUs within the pg may have changed.
670  */
671 static void
pghw_cpulist_alloc(pghw_t * pg)672 pghw_cpulist_alloc(pghw_t *pg)
673 {
674 	uint_t	ncpus = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
675 	size_t	len = CPUSTR_LEN(ncpus);
676 
677 	/*
678 	 * If the pghw_cpulist string is already allocated we need to make sure
679 	 * that it has sufficient length. Also if the set of CPUs may have
680 	 * changed, we need to re-generate the string.
681 	 */
682 	if (pg->pghw_cpulist != NULL &&
683 	    pg->pghw_kstat_gen != pg->pghw_generation) {
684 		if (len <= pg->pghw_cpulist_len) {
685 			/*
686 			 * There is sufficient space in the pghw_cpulist for
687 			 * the new set of CPUs. Just clear the string to trigger
688 			 * re-generation of list of CPUs
689 			 */
690 			*(pg->pghw_cpulist) = '\0';
691 		} else {
692 			/*
693 			 * There is, potentially, insufficient space in
694 			 * pghw_cpulist, so reallocate the string.
695 			 */
696 			ASSERT(strlen(pg->pghw_cpulist) < pg->pghw_cpulist_len);
697 			kmem_free(pg->pghw_cpulist, pg->pghw_cpulist_len);
698 			pg->pghw_cpulist = NULL;
699 			pg->pghw_cpulist_len = 0;
700 		}
701 	}
702 
703 	if (pg->pghw_cpulist == NULL) {
704 		/*
705 		 * Allocate space to hold cpulist.
706 		 *
707 		 * Length can not be bigger that the maximum space we have
708 		 * allowed for the kstat buffer
709 		 */
710 		if (len > pg_cpulist_maxlen)
711 			len = pg_cpulist_maxlen;
712 		if (len > 0) {
713 			pg->pghw_cpulist = kmem_zalloc(len, KM_NOSLEEP);
714 			if (pg->pghw_cpulist != NULL)
715 				pg->pghw_cpulist_len = len;
716 		}
717 	}
718 }
719 
720 static int
cpu2id(void * v)721 cpu2id(void *v)
722 {
723 	cpu_t *cp = (cpu_t *)v;
724 
725 	ASSERT(v != NULL);
726 
727 	return (cp->cpu_id);
728 }
729 
730 /*
731  * Return parent ID or -1 if there is no parent.
732  * All hardware PGs are currently also CMT PGs, but for safety we check the
733  * class matches cmt before we upcast the pghw pointer to pg_cmt_t.
734  */
735 static pgid_t
pghw_parent_id(pghw_t * pghw)736 pghw_parent_id(pghw_t *pghw)
737 {
738 	pg_t *pg = (pg_t *)pghw;
739 	pgid_t parent_id = -1;
740 
741 	if (pg != NULL && strcmp(pg->pg_class->pgc_name, "cmt") == 0) {
742 		pg_cmt_t *cmt = (pg_cmt_t *)pg;
743 		pg_t *parent = (pg_t *)cmt->cmt_parent;
744 		if (parent != NULL)
745 			parent_id = parent->pg_id;
746 	}
747 
748 	return (parent_id);
749 }
750