1fb2f18f8Sesaxe /* 2fb2f18f8Sesaxe * CDDL HEADER START 3fb2f18f8Sesaxe * 4fb2f18f8Sesaxe * The contents of this file are subject to the terms of the 5fb2f18f8Sesaxe * Common Development and Distribution License (the "License"). 6fb2f18f8Sesaxe * You may not use this file except in compliance with the License. 7fb2f18f8Sesaxe * 8fb2f18f8Sesaxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fb2f18f8Sesaxe * or http://www.opensolaris.org/os/licensing. 10fb2f18f8Sesaxe * See the License for the specific language governing permissions 11fb2f18f8Sesaxe * and limitations under the License. 12fb2f18f8Sesaxe * 13fb2f18f8Sesaxe * When distributing Covered Code, include this CDDL HEADER in each 14fb2f18f8Sesaxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fb2f18f8Sesaxe * If applicable, add the following below this CDDL HEADER, with the 16fb2f18f8Sesaxe * fields enclosed by brackets "[]" replaced with your own identifying 17fb2f18f8Sesaxe * information: Portions Copyright [yyyy] [name of copyright owner] 18fb2f18f8Sesaxe * 19fb2f18f8Sesaxe * CDDL HEADER END 20fb2f18f8Sesaxe */ 21fb2f18f8Sesaxe /* 220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23fb2f18f8Sesaxe * Use is subject to license terms. 24fb2f18f8Sesaxe */ 25fb2f18f8Sesaxe 26fb2f18f8Sesaxe #include <sys/systm.h> 27fb2f18f8Sesaxe #include <sys/types.h> 28fb2f18f8Sesaxe #include <sys/param.h> 29fb2f18f8Sesaxe #include <sys/thread.h> 30fb2f18f8Sesaxe #include <sys/cpuvar.h> 31fb2f18f8Sesaxe #include <sys/kmem.h> 32fb2f18f8Sesaxe #include <sys/cmn_err.h> 33fb2f18f8Sesaxe #include <sys/group.h> 34fb2f18f8Sesaxe #include <sys/pg.h> 35fb2f18f8Sesaxe #include <sys/pghw.h> 360e751525SEric Saxe #include <sys/cpu_pm.h> 37*b885580bSAlexander Kolbasov #include <sys/cap_util.h> 38fb2f18f8Sesaxe 39fb2f18f8Sesaxe /* 40fb2f18f8Sesaxe * Processor Groups: Hardware sharing relationship layer 41fb2f18f8Sesaxe * 42fb2f18f8Sesaxe * This file implements an extension to Processor Groups to capture 43fb2f18f8Sesaxe * hardware sharing relationships existing between logical CPUs. Examples of 44fb2f18f8Sesaxe * hardware sharing relationships include shared caches on some CMT 45fb2f18f8Sesaxe * procesoor architectures, or shared local memory controllers on NUMA 46fb2f18f8Sesaxe * based system architectures. 47fb2f18f8Sesaxe * 48fb2f18f8Sesaxe * The pghw_t structure represents the extended PG. The first member 49fb2f18f8Sesaxe * of the structure is the generic pg_t with the pghw specific members 50fb2f18f8Sesaxe * following. The generic pg_t *must* remain the first member of the 51fb2f18f8Sesaxe * structure as the code uses casting of structure references to access 52fb2f18f8Sesaxe * the generic pg_t structure elements. 53fb2f18f8Sesaxe * 54fb2f18f8Sesaxe * In addition to the generic CPU grouping, physical PGs have a hardware 55fb2f18f8Sesaxe * sharing relationship enumerated "type", and an instance id. The enumerated 56fb2f18f8Sesaxe * type is defined by the pghw_type_t enumeration, while the instance id 57fb2f18f8Sesaxe * uniquely identifies the sharing instance from among others of the same 58fb2f18f8Sesaxe * hardware sharing type. 59fb2f18f8Sesaxe * 60fb2f18f8Sesaxe * The physical PGs are organized into an overall hierarchy, and are tracked 61fb2f18f8Sesaxe * in a number of different per CPU, and per pghw_type_t type groups. 62fb2f18f8Sesaxe * As an example: 63fb2f18f8Sesaxe * 64fb2f18f8Sesaxe * ------------- 65fb2f18f8Sesaxe * | pg_hw | 66fb2f18f8Sesaxe * | (group_t) | 67fb2f18f8Sesaxe * ------------- 68fb2f18f8Sesaxe * || ============================ 69fb2f18f8Sesaxe * ||\\-----------------------// \\ \\ 70fb2f18f8Sesaxe * || | hwset (PGC_HW_CHIP) | ------------- ------------- 71fb2f18f8Sesaxe * || | (group_t) | | pghw_t | | pghw_t | 72fb2f18f8Sesaxe * || ----------------------- | chip 0 | | chip 1 | 73fb2f18f8Sesaxe * || ------------- ------------- 74fb2f18f8Sesaxe * || \\ \\ \\ \\ \\ \\ \\ \\ 75fb2f18f8Sesaxe * || cpu cpu cpu cpu cpu cpu cpu cpu 76fb2f18f8Sesaxe * || 77fb2f18f8Sesaxe * || ============================ 78fb2f18f8Sesaxe * ||\\-----------------------// \\ \\ 79fb2f18f8Sesaxe * || | hwset (PGC_HW_IPIPE)| ------------- ------------- 80fb2f18f8Sesaxe * || | (group_t) | | pghw_t | | pghw_t | 81fb2f18f8Sesaxe * || ----------------------- | ipipe 0 | | ipipe 1 | 82fb2f18f8Sesaxe * || ------------- ------------- 83fb2f18f8Sesaxe * || \\ \\ \\ \\ 84fb2f18f8Sesaxe * || cpu cpu cpu cpu 85fb2f18f8Sesaxe * ... 86fb2f18f8Sesaxe * 87fb2f18f8Sesaxe * 88fb2f18f8Sesaxe * The top level pg_hw is a group of "hwset" groups. Each hwset holds of group 89fb2f18f8Sesaxe * of physical PGs of the same hardware sharing type. Within each hwset, the 90fb2f18f8Sesaxe * PG's instance id uniquely identifies the grouping relationshsip among other 91fb2f18f8Sesaxe * groupings of the same sharing type. The instance id for a grouping is 92fb2f18f8Sesaxe * platform defined, and in some cases may be used by platform code as a handle 93fb2f18f8Sesaxe * to search for a particular relationship instance. 94fb2f18f8Sesaxe * 95fb2f18f8Sesaxe * Each physical PG (by virtue of the embedded pg_t) contains a group of CPUs 96fb2f18f8Sesaxe * that participate in the sharing relationship. Each CPU also has associated 97fb2f18f8Sesaxe * with it a grouping tracking the PGs in which the CPU belongs. This can be 98fb2f18f8Sesaxe * used to iterate over the various relationships in which the CPU participates 99fb2f18f8Sesaxe * (the CPU's chip, cache, lgroup, etc.). 100fb2f18f8Sesaxe * 101fb2f18f8Sesaxe * The hwsets are created dynamically as new hardware sharing relationship types 1020e751525SEric Saxe * are instantiated. They are never destroyed, as once a given relationship 103fb2f18f8Sesaxe * type appears in the system, it is quite likely that at least one instance of 104fb2f18f8Sesaxe * that relationship will always persist as long as the system is running. 105fb2f18f8Sesaxe */ 106fb2f18f8Sesaxe 107fb2f18f8Sesaxe static group_t *pg_hw; /* top level pg hw group */ 108fb2f18f8Sesaxe 109fb2f18f8Sesaxe /* 110fb2f18f8Sesaxe * Physical PG kstats 111fb2f18f8Sesaxe */ 112fb2f18f8Sesaxe struct pghw_kstat { 113fb2f18f8Sesaxe kstat_named_t pg_id; 114fb2f18f8Sesaxe kstat_named_t pg_class; 115fb2f18f8Sesaxe kstat_named_t pg_ncpus; 116fb2f18f8Sesaxe kstat_named_t pg_instance_id; 117fb2f18f8Sesaxe kstat_named_t pg_hw; 1180e751525SEric Saxe kstat_named_t pg_policy; 119fb2f18f8Sesaxe } pghw_kstat = { 120*b885580bSAlexander Kolbasov { "id", KSTAT_DATA_UINT32 }, 121fb2f18f8Sesaxe { "pg_class", KSTAT_DATA_STRING }, 122*b885580bSAlexander Kolbasov { "ncpus", KSTAT_DATA_UINT32 }, 123*b885580bSAlexander Kolbasov { "instance_id", KSTAT_DATA_UINT32 }, 124fb2f18f8Sesaxe { "hardware", KSTAT_DATA_STRING }, 1250e751525SEric Saxe { "policy", KSTAT_DATA_STRING }, 126fb2f18f8Sesaxe }; 127fb2f18f8Sesaxe 128fb2f18f8Sesaxe kmutex_t pghw_kstat_lock; 129fb2f18f8Sesaxe 130fb2f18f8Sesaxe /* 131*b885580bSAlexander Kolbasov * Capacity and Utilization PG kstats 132*b885580bSAlexander Kolbasov * 133*b885580bSAlexander Kolbasov * These kstats are updated one at a time, so we can have a single scratch space 134*b885580bSAlexander Kolbasov * to fill the data. 135*b885580bSAlexander Kolbasov * 136*b885580bSAlexander Kolbasov * kstat fields: 137*b885580bSAlexander Kolbasov * 138*b885580bSAlexander Kolbasov * pgid PG ID for PG described by this kstat 139*b885580bSAlexander Kolbasov * 140*b885580bSAlexander Kolbasov * pg_ncpus Number of CPUs within this PG 141*b885580bSAlexander Kolbasov * 142*b885580bSAlexander Kolbasov * pg_cpus String describing CPUs within this PG 143*b885580bSAlexander Kolbasov * 144*b885580bSAlexander Kolbasov * pg_sharing Name of sharing relationship for this PG 145*b885580bSAlexander Kolbasov * 146*b885580bSAlexander Kolbasov * pg_generation Generation value that increases whenever any CPU leaves 147*b885580bSAlexander Kolbasov * or joins PG. Two kstat snapshots for the same 148*b885580bSAlexander Kolbasov * CPU may only be compared if they have the same 149*b885580bSAlexander Kolbasov * generation 150*b885580bSAlexander Kolbasov * 151*b885580bSAlexander Kolbasov * pg_hw_util Running value of PG utilization for the sharing 152*b885580bSAlexander Kolbasov * relationship 153*b885580bSAlexander Kolbasov * 154*b885580bSAlexander Kolbasov * pg_hw_util_time_running 155*b885580bSAlexander Kolbasov * Total time spent collecting CU data. The time may be 156*b885580bSAlexander Kolbasov * less than wall time if CU counters were stopped for 157*b885580bSAlexander Kolbasov * some time. 158*b885580bSAlexander Kolbasov * 159*b885580bSAlexander Kolbasov * pg_hw_util_time_stopped Total time the CU counters were stopped. 160*b885580bSAlexander Kolbasov * 161*b885580bSAlexander Kolbasov * pg_hw_util_rate Utilization rate, expressed in operations per second. 162*b885580bSAlexander Kolbasov * 163*b885580bSAlexander Kolbasov * pg_hw_util_rate_max Maximum observed value of utilization rate. 164*b885580bSAlexander Kolbasov */ 165*b885580bSAlexander Kolbasov struct pghw_cu_kstat { 166*b885580bSAlexander Kolbasov kstat_named_t pg_id; 167*b885580bSAlexander Kolbasov kstat_named_t pg_ncpus; 168*b885580bSAlexander Kolbasov kstat_named_t pg_generation; 169*b885580bSAlexander Kolbasov kstat_named_t pg_hw_util; 170*b885580bSAlexander Kolbasov kstat_named_t pg_hw_util_time_running; 171*b885580bSAlexander Kolbasov kstat_named_t pg_hw_util_time_stopped; 172*b885580bSAlexander Kolbasov kstat_named_t pg_hw_util_rate; 173*b885580bSAlexander Kolbasov kstat_named_t pg_hw_util_rate_max; 174*b885580bSAlexander Kolbasov kstat_named_t pg_cpus; 175*b885580bSAlexander Kolbasov kstat_named_t pg_sharing; 176*b885580bSAlexander Kolbasov } pghw_cu_kstat = { 177*b885580bSAlexander Kolbasov { "id", KSTAT_DATA_UINT32 }, 178*b885580bSAlexander Kolbasov { "ncpus", KSTAT_DATA_UINT32 }, 179*b885580bSAlexander Kolbasov { "generation", KSTAT_DATA_UINT32 }, 180*b885580bSAlexander Kolbasov { "hw_util", KSTAT_DATA_UINT64 }, 181*b885580bSAlexander Kolbasov { "hw_util_time_running", KSTAT_DATA_UINT64 }, 182*b885580bSAlexander Kolbasov { "hw_util_time_stopped", KSTAT_DATA_UINT64 }, 183*b885580bSAlexander Kolbasov { "hw_util_rate", KSTAT_DATA_UINT64 }, 184*b885580bSAlexander Kolbasov { "hw_util_rate_max", KSTAT_DATA_UINT64 }, 185*b885580bSAlexander Kolbasov { "cpus", KSTAT_DATA_STRING }, 186*b885580bSAlexander Kolbasov { "sharing_relation", KSTAT_DATA_STRING }, 187*b885580bSAlexander Kolbasov }; 188*b885580bSAlexander Kolbasov 189*b885580bSAlexander Kolbasov /* 190*b885580bSAlexander Kolbasov * Calculate the string size to represent NCPUS. Allow 5 digits for each CPU ID 191*b885580bSAlexander Kolbasov * plus one space per CPU plus NUL byte in the end. This is only an estimate, 192*b885580bSAlexander Kolbasov * since we try to compress CPU ranges as x-y. In the worst case the string 193*b885580bSAlexander Kolbasov * representation of CPUs may be truncated. 194*b885580bSAlexander Kolbasov */ 195*b885580bSAlexander Kolbasov #define CPUSTR_LEN(ncpus) ((ncpus) * 6) 196*b885580bSAlexander Kolbasov 197*b885580bSAlexander Kolbasov /* 198*b885580bSAlexander Kolbasov * Maximum length of the string that represents list of CPUs 199*b885580bSAlexander Kolbasov */ 200*b885580bSAlexander Kolbasov static int pg_cpulist_maxlen = 0; 201*b885580bSAlexander Kolbasov 202*b885580bSAlexander Kolbasov static void pghw_kstat_create(pghw_t *); 203*b885580bSAlexander Kolbasov static int pghw_kstat_update(kstat_t *, int); 204*b885580bSAlexander Kolbasov static int pghw_cu_kstat_update(kstat_t *, int); 205*b885580bSAlexander Kolbasov static int cpu2id(void *); 206*b885580bSAlexander Kolbasov 207*b885580bSAlexander Kolbasov /* 208fb2f18f8Sesaxe * hwset operations 209fb2f18f8Sesaxe */ 210fb2f18f8Sesaxe static group_t *pghw_set_create(pghw_type_t); 211fb2f18f8Sesaxe static void pghw_set_add(group_t *, pghw_t *); 212fb2f18f8Sesaxe static void pghw_set_remove(group_t *, pghw_t *); 213fb2f18f8Sesaxe 214*b885580bSAlexander Kolbasov static void pghw_cpulist_alloc(pghw_t *); 215*b885580bSAlexander Kolbasov static int cpu2id(void *); 216*b885580bSAlexander Kolbasov 217fb2f18f8Sesaxe /* 2180e751525SEric Saxe * Initialize the physical portion of a hardware PG 219fb2f18f8Sesaxe */ 220fb2f18f8Sesaxe void 221fb2f18f8Sesaxe pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw) 222fb2f18f8Sesaxe { 223fb2f18f8Sesaxe group_t *hwset; 224fb2f18f8Sesaxe 225fb2f18f8Sesaxe if ((hwset = pghw_set_lookup(hw)) == NULL) { 226fb2f18f8Sesaxe /* 227fb2f18f8Sesaxe * Haven't seen this hardware type yet 228fb2f18f8Sesaxe */ 229fb2f18f8Sesaxe hwset = pghw_set_create(hw); 230fb2f18f8Sesaxe } 231fb2f18f8Sesaxe 232fb2f18f8Sesaxe pghw_set_add(hwset, pg); 233fb2f18f8Sesaxe pg->pghw_hw = hw; 234*b885580bSAlexander Kolbasov pg->pghw_generation = 0; 235fb2f18f8Sesaxe pg->pghw_instance = 236fb2f18f8Sesaxe pg_plat_hw_instance_id(cp, hw); 237fb2f18f8Sesaxe pghw_kstat_create(pg); 2380e751525SEric Saxe 2390e751525SEric Saxe /* 2400e751525SEric Saxe * Hardware sharing relationship specific initialization 2410e751525SEric Saxe */ 2420e751525SEric Saxe switch (pg->pghw_hw) { 2430e751525SEric Saxe case PGHW_POW_ACTIVE: 2440e751525SEric Saxe pg->pghw_handle = 2450e751525SEric Saxe (pghw_handle_t)cpupm_domain_init(cp, CPUPM_DTYPE_ACTIVE); 2460e751525SEric Saxe break; 2470e751525SEric Saxe case PGHW_POW_IDLE: 2480e751525SEric Saxe pg->pghw_handle = 2490e751525SEric Saxe (pghw_handle_t)cpupm_domain_init(cp, CPUPM_DTYPE_IDLE); 2500e751525SEric Saxe break; 2510e751525SEric Saxe default: 2520e751525SEric Saxe pg->pghw_handle = (pghw_handle_t)NULL; 2530e751525SEric Saxe } 254fb2f18f8Sesaxe } 255fb2f18f8Sesaxe 256fb2f18f8Sesaxe /* 257fb2f18f8Sesaxe * Teardown the physical portion of a physical PG 258fb2f18f8Sesaxe */ 259fb2f18f8Sesaxe void 260fb2f18f8Sesaxe pghw_fini(pghw_t *pg) 261fb2f18f8Sesaxe { 262fb2f18f8Sesaxe group_t *hwset; 263fb2f18f8Sesaxe 264fb2f18f8Sesaxe hwset = pghw_set_lookup(pg->pghw_hw); 265fb2f18f8Sesaxe ASSERT(hwset != NULL); 266fb2f18f8Sesaxe 267fb2f18f8Sesaxe pghw_set_remove(hwset, pg); 268fb2f18f8Sesaxe pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON; 269fb2f18f8Sesaxe pg->pghw_hw = (pghw_type_t)-1; 270fb2f18f8Sesaxe 271*b885580bSAlexander Kolbasov if (pg->pghw_kstat != NULL) 272fb2f18f8Sesaxe kstat_delete(pg->pghw_kstat); 273*b885580bSAlexander Kolbasov 274*b885580bSAlexander Kolbasov /* 275*b885580bSAlexander Kolbasov * Destroy string representation of CPUs 276*b885580bSAlexander Kolbasov */ 277*b885580bSAlexander Kolbasov if (pg->pghw_cpulist != NULL) { 278*b885580bSAlexander Kolbasov kmem_free(pg->pghw_cpulist, 279*b885580bSAlexander Kolbasov pg->pghw_cpulist_len); 280*b885580bSAlexander Kolbasov pg->pghw_cpulist = NULL; 281*b885580bSAlexander Kolbasov } 282*b885580bSAlexander Kolbasov 283*b885580bSAlexander Kolbasov if (pg->pghw_cu_kstat != NULL) 284*b885580bSAlexander Kolbasov kstat_delete(pg->pghw_cu_kstat); 285fb2f18f8Sesaxe } 286fb2f18f8Sesaxe 287fb2f18f8Sesaxe /* 288fb2f18f8Sesaxe * Find an existing physical PG in which to place 289fb2f18f8Sesaxe * the given CPU for the specified hardware sharing 290fb2f18f8Sesaxe * relationship 291fb2f18f8Sesaxe */ 292fb2f18f8Sesaxe pghw_t * 293fb2f18f8Sesaxe pghw_place_cpu(cpu_t *cp, pghw_type_t hw) 294fb2f18f8Sesaxe { 295fb2f18f8Sesaxe group_t *hwset; 296fb2f18f8Sesaxe 297fb2f18f8Sesaxe if ((hwset = pghw_set_lookup(hw)) == NULL) { 298fb2f18f8Sesaxe return (NULL); 299fb2f18f8Sesaxe } 300fb2f18f8Sesaxe 301fb2f18f8Sesaxe return ((pghw_t *)pg_cpu_find_pg(cp, hwset)); 302fb2f18f8Sesaxe } 303fb2f18f8Sesaxe 304fb2f18f8Sesaxe /* 305fb2f18f8Sesaxe * Find the pg representing the hw sharing relationship in which 306fb2f18f8Sesaxe * cp belongs 307fb2f18f8Sesaxe */ 308fb2f18f8Sesaxe pghw_t * 309fb2f18f8Sesaxe pghw_find_pg(cpu_t *cp, pghw_type_t hw) 310fb2f18f8Sesaxe { 311fb2f18f8Sesaxe group_iter_t i; 312fb2f18f8Sesaxe pghw_t *pg; 313fb2f18f8Sesaxe 314fb2f18f8Sesaxe group_iter_init(&i); 315fb2f18f8Sesaxe while ((pg = group_iterate(&cp->cpu_pg->pgs, &i)) != NULL) { 316fb2f18f8Sesaxe if (pg->pghw_hw == hw) 317fb2f18f8Sesaxe return (pg); 318fb2f18f8Sesaxe } 319fb2f18f8Sesaxe return (NULL); 320fb2f18f8Sesaxe } 321fb2f18f8Sesaxe 322fb2f18f8Sesaxe /* 323fb2f18f8Sesaxe * Find the PG of the given hardware sharing relationship 324fb2f18f8Sesaxe * type with the given instance id 325fb2f18f8Sesaxe */ 326fb2f18f8Sesaxe pghw_t * 327fb2f18f8Sesaxe pghw_find_by_instance(id_t id, pghw_type_t hw) 328fb2f18f8Sesaxe { 329fb2f18f8Sesaxe group_iter_t i; 330fb2f18f8Sesaxe group_t *set; 331fb2f18f8Sesaxe pghw_t *pg; 332fb2f18f8Sesaxe 333fb2f18f8Sesaxe set = pghw_set_lookup(hw); 334fb2f18f8Sesaxe if (!set) 335fb2f18f8Sesaxe return (NULL); 336fb2f18f8Sesaxe 337fb2f18f8Sesaxe group_iter_init(&i); 338fb2f18f8Sesaxe while ((pg = group_iterate(set, &i)) != NULL) { 339fb2f18f8Sesaxe if (pg->pghw_instance == id) 340fb2f18f8Sesaxe return (pg); 341fb2f18f8Sesaxe } 342fb2f18f8Sesaxe return (NULL); 343fb2f18f8Sesaxe } 344fb2f18f8Sesaxe 345fb2f18f8Sesaxe /* 346fb2f18f8Sesaxe * CPUs physical ID cache creation / destruction 347fb2f18f8Sesaxe * The cache's elements are initialized to the CPU's id 348fb2f18f8Sesaxe */ 349fb2f18f8Sesaxe void 350fb2f18f8Sesaxe pghw_physid_create(cpu_t *cp) 351fb2f18f8Sesaxe { 352fb2f18f8Sesaxe int i; 353fb2f18f8Sesaxe 354fb2f18f8Sesaxe cp->cpu_physid = kmem_alloc(sizeof (cpu_physid_t), KM_SLEEP); 355fb2f18f8Sesaxe 356fb2f18f8Sesaxe for (i = 0; i < (sizeof (cpu_physid_t) / sizeof (id_t)); i++) { 357fb2f18f8Sesaxe ((id_t *)cp->cpu_physid)[i] = cp->cpu_id; 358fb2f18f8Sesaxe } 359fb2f18f8Sesaxe } 360fb2f18f8Sesaxe 361fb2f18f8Sesaxe void 362fb2f18f8Sesaxe pghw_physid_destroy(cpu_t *cp) 363fb2f18f8Sesaxe { 364fb2f18f8Sesaxe if (cp->cpu_physid) { 365fb2f18f8Sesaxe kmem_free(cp->cpu_physid, sizeof (cpu_physid_t)); 366fb2f18f8Sesaxe cp->cpu_physid = NULL; 367fb2f18f8Sesaxe } 368fb2f18f8Sesaxe } 369fb2f18f8Sesaxe 370fb2f18f8Sesaxe /* 371fb2f18f8Sesaxe * Create a new, empty hwset. 372fb2f18f8Sesaxe * This routine may block, and must not be called from any 373fb2f18f8Sesaxe * paused CPU context. 374fb2f18f8Sesaxe */ 375fb2f18f8Sesaxe static group_t * 376fb2f18f8Sesaxe pghw_set_create(pghw_type_t hw) 377fb2f18f8Sesaxe { 378fb2f18f8Sesaxe group_t *g; 379fb2f18f8Sesaxe int ret; 380fb2f18f8Sesaxe 381fb2f18f8Sesaxe /* 382fb2f18f8Sesaxe * Create the top level PG hw group if it doesn't already exist 383fb2f18f8Sesaxe * This is a "set" of hardware sets, that is ordered (and indexed) 384fb2f18f8Sesaxe * by the pghw_type_t enum. 385fb2f18f8Sesaxe */ 386fb2f18f8Sesaxe if (pg_hw == NULL) { 387fb2f18f8Sesaxe pg_hw = kmem_alloc(sizeof (group_t), KM_SLEEP); 388fb2f18f8Sesaxe group_create(pg_hw); 389fb2f18f8Sesaxe group_expand(pg_hw, (uint_t)PGHW_NUM_COMPONENTS); 390fb2f18f8Sesaxe } 391fb2f18f8Sesaxe 392fb2f18f8Sesaxe /* 393fb2f18f8Sesaxe * Create the new hwset 394fb2f18f8Sesaxe * Add it to the top level pg_hw group. 395fb2f18f8Sesaxe */ 396fb2f18f8Sesaxe g = kmem_alloc(sizeof (group_t), KM_SLEEP); 397fb2f18f8Sesaxe group_create(g); 398fb2f18f8Sesaxe 399fb2f18f8Sesaxe ret = group_add_at(pg_hw, g, (uint_t)hw); 400fb2f18f8Sesaxe ASSERT(ret == 0); 401fb2f18f8Sesaxe 402fb2f18f8Sesaxe return (g); 403fb2f18f8Sesaxe } 404fb2f18f8Sesaxe 405fb2f18f8Sesaxe /* 406fb2f18f8Sesaxe * Find the hwset associated with the given hardware sharing type 407fb2f18f8Sesaxe */ 408fb2f18f8Sesaxe group_t * 409fb2f18f8Sesaxe pghw_set_lookup(pghw_type_t hw) 410fb2f18f8Sesaxe { 411fb2f18f8Sesaxe group_t *hwset; 412fb2f18f8Sesaxe 413fb2f18f8Sesaxe if (pg_hw == NULL) 414fb2f18f8Sesaxe return (NULL); 415fb2f18f8Sesaxe 416fb2f18f8Sesaxe hwset = GROUP_ACCESS(pg_hw, (uint_t)hw); 417fb2f18f8Sesaxe return (hwset); 418fb2f18f8Sesaxe } 419fb2f18f8Sesaxe 420fb2f18f8Sesaxe /* 421fb2f18f8Sesaxe * Add a PG to a hwset 422fb2f18f8Sesaxe */ 423fb2f18f8Sesaxe static void 424fb2f18f8Sesaxe pghw_set_add(group_t *hwset, pghw_t *pg) 425fb2f18f8Sesaxe { 426fb2f18f8Sesaxe (void) group_add(hwset, pg, GRP_RESIZE); 427fb2f18f8Sesaxe } 428fb2f18f8Sesaxe 429fb2f18f8Sesaxe /* 430fb2f18f8Sesaxe * Remove a PG from a hwset 431fb2f18f8Sesaxe */ 432fb2f18f8Sesaxe static void 433fb2f18f8Sesaxe pghw_set_remove(group_t *hwset, pghw_t *pg) 434fb2f18f8Sesaxe { 435fb2f18f8Sesaxe int result; 436fb2f18f8Sesaxe 437fb2f18f8Sesaxe result = group_remove(hwset, pg, GRP_RESIZE); 438fb2f18f8Sesaxe ASSERT(result == 0); 439fb2f18f8Sesaxe } 440fb2f18f8Sesaxe 441fb2f18f8Sesaxe /* 442fb2f18f8Sesaxe * Return a string name given a pg_hw sharing type 443fb2f18f8Sesaxe */ 444*b885580bSAlexander Kolbasov char * 445fb2f18f8Sesaxe pghw_type_string(pghw_type_t hw) 446fb2f18f8Sesaxe { 447fb2f18f8Sesaxe switch (hw) { 448fb2f18f8Sesaxe case PGHW_IPIPE: 4490e751525SEric Saxe return ("Integer Pipeline"); 450fb2f18f8Sesaxe case PGHW_CACHE: 4510e751525SEric Saxe return ("Cache"); 452fb2f18f8Sesaxe case PGHW_FPU: 4530e751525SEric Saxe return ("Floating Point Unit"); 454e853d8c3Sjc25722 case PGHW_MPIPE: 4550e751525SEric Saxe return ("Data Pipe to memory"); 456fb2f18f8Sesaxe case PGHW_CHIP: 4570e751525SEric Saxe return ("Socket"); 458fb2f18f8Sesaxe case PGHW_MEMORY: 4590e751525SEric Saxe return ("Memory"); 4600e751525SEric Saxe case PGHW_POW_ACTIVE: 4610e751525SEric Saxe return ("CPU PM Active Power Domain"); 4620e751525SEric Saxe case PGHW_POW_IDLE: 4630e751525SEric Saxe return ("CPU PM Idle Power Domain"); 464fb2f18f8Sesaxe default: 465fb2f18f8Sesaxe return ("unknown"); 466fb2f18f8Sesaxe } 467fb2f18f8Sesaxe } 468fb2f18f8Sesaxe 469fb2f18f8Sesaxe /* 470*b885580bSAlexander Kolbasov * Return a short string name given a pg_hw sharing type 471*b885580bSAlexander Kolbasov */ 472*b885580bSAlexander Kolbasov char * 473*b885580bSAlexander Kolbasov pghw_type_shortstring(pghw_type_t hw) 474*b885580bSAlexander Kolbasov { 475*b885580bSAlexander Kolbasov switch (hw) { 476*b885580bSAlexander Kolbasov case PGHW_IPIPE: 477*b885580bSAlexander Kolbasov return ("instr_pipeline"); 478*b885580bSAlexander Kolbasov case PGHW_CACHE: 479*b885580bSAlexander Kolbasov return ("Cache"); 480*b885580bSAlexander Kolbasov case PGHW_FPU: 481*b885580bSAlexander Kolbasov return ("FPU"); 482*b885580bSAlexander Kolbasov case PGHW_MPIPE: 483*b885580bSAlexander Kolbasov return ("memory_pipeline"); 484*b885580bSAlexander Kolbasov case PGHW_CHIP: 485*b885580bSAlexander Kolbasov return ("Socket"); 486*b885580bSAlexander Kolbasov case PGHW_MEMORY: 487*b885580bSAlexander Kolbasov return ("Memory"); 488*b885580bSAlexander Kolbasov case PGHW_POW_ACTIVE: 489*b885580bSAlexander Kolbasov return ("CPU_PM_Active"); 490*b885580bSAlexander Kolbasov case PGHW_POW_IDLE: 491*b885580bSAlexander Kolbasov return ("CPU_PM_Idle"); 492*b885580bSAlexander Kolbasov default: 493*b885580bSAlexander Kolbasov return ("unknown"); 494*b885580bSAlexander Kolbasov } 495*b885580bSAlexander Kolbasov } 496*b885580bSAlexander Kolbasov 497*b885580bSAlexander Kolbasov /* 498fb2f18f8Sesaxe * Create / Update routines for PG hw kstats 499fb2f18f8Sesaxe * 500fb2f18f8Sesaxe * It is the intention of these kstats to provide some level 501fb2f18f8Sesaxe * of informational / debugging observability into the types 502fb2f18f8Sesaxe * and nature of the system's detected hardware sharing relationships 503fb2f18f8Sesaxe */ 504fb2f18f8Sesaxe void 505fb2f18f8Sesaxe pghw_kstat_create(pghw_t *pg) 506fb2f18f8Sesaxe { 507*b885580bSAlexander Kolbasov char *class = pghw_type_string(pg->pghw_hw); 508*b885580bSAlexander Kolbasov 509fb2f18f8Sesaxe /* 510fb2f18f8Sesaxe * Create a physical pg kstat 511fb2f18f8Sesaxe */ 512fb2f18f8Sesaxe if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id, 513*b885580bSAlexander Kolbasov "pg", "pg", 514*b885580bSAlexander Kolbasov KSTAT_TYPE_NAMED, 515fb2f18f8Sesaxe sizeof (pghw_kstat) / sizeof (kstat_named_t), 516fb2f18f8Sesaxe KSTAT_FLAG_VIRTUAL)) != NULL) { 5170e751525SEric Saxe /* Class string, hw string, and policy string */ 518fb2f18f8Sesaxe pg->pghw_kstat->ks_data_size += PG_CLASS_NAME_MAX; 5190e751525SEric Saxe pg->pghw_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX; 5200e751525SEric Saxe pg->pghw_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX; 521fb2f18f8Sesaxe pg->pghw_kstat->ks_lock = &pghw_kstat_lock; 522fb2f18f8Sesaxe pg->pghw_kstat->ks_data = &pghw_kstat; 523fb2f18f8Sesaxe pg->pghw_kstat->ks_update = pghw_kstat_update; 524fb2f18f8Sesaxe pg->pghw_kstat->ks_private = pg; 525fb2f18f8Sesaxe kstat_install(pg->pghw_kstat); 526fb2f18f8Sesaxe } 527*b885580bSAlexander Kolbasov 528*b885580bSAlexander Kolbasov if (pg_cpulist_maxlen == 0) 529*b885580bSAlexander Kolbasov pg_cpulist_maxlen = CPUSTR_LEN(max_ncpus); 530*b885580bSAlexander Kolbasov 531*b885580bSAlexander Kolbasov /* 532*b885580bSAlexander Kolbasov * Create a physical pg kstat 533*b885580bSAlexander Kolbasov */ 534*b885580bSAlexander Kolbasov if ((pg->pghw_cu_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id, 535*b885580bSAlexander Kolbasov "hardware", class, 536*b885580bSAlexander Kolbasov KSTAT_TYPE_NAMED, 537*b885580bSAlexander Kolbasov sizeof (pghw_cu_kstat) / sizeof (kstat_named_t), 538*b885580bSAlexander Kolbasov KSTAT_FLAG_VIRTUAL)) != NULL) { 539*b885580bSAlexander Kolbasov pg->pghw_cu_kstat->ks_lock = &pghw_kstat_lock; 540*b885580bSAlexander Kolbasov pg->pghw_cu_kstat->ks_data = &pghw_cu_kstat; 541*b885580bSAlexander Kolbasov pg->pghw_cu_kstat->ks_update = pghw_cu_kstat_update; 542*b885580bSAlexander Kolbasov pg->pghw_cu_kstat->ks_private = pg; 543*b885580bSAlexander Kolbasov pg->pghw_cu_kstat->ks_data_size += strlen(class) + 1; 544*b885580bSAlexander Kolbasov /* Allow space for CPU strings */ 545*b885580bSAlexander Kolbasov pg->pghw_cu_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX; 546*b885580bSAlexander Kolbasov pg->pghw_cu_kstat->ks_data_size += pg_cpulist_maxlen; 547*b885580bSAlexander Kolbasov kstat_install(pg->pghw_cu_kstat); 548*b885580bSAlexander Kolbasov } 549fb2f18f8Sesaxe } 550fb2f18f8Sesaxe 551fb2f18f8Sesaxe int 552fb2f18f8Sesaxe pghw_kstat_update(kstat_t *ksp, int rw) 553fb2f18f8Sesaxe { 554fb2f18f8Sesaxe struct pghw_kstat *pgsp = &pghw_kstat; 555fb2f18f8Sesaxe pghw_t *pg = ksp->ks_private; 556fb2f18f8Sesaxe 557fb2f18f8Sesaxe if (rw == KSTAT_WRITE) 558fb2f18f8Sesaxe return (EACCES); 559fb2f18f8Sesaxe 560*b885580bSAlexander Kolbasov pgsp->pg_id.value.ui32 = ((pg_t *)pg)->pg_id; 561*b885580bSAlexander Kolbasov pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus); 562*b885580bSAlexander Kolbasov pgsp->pg_instance_id.value.ui32 = pg->pghw_instance; 563fb2f18f8Sesaxe kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name); 564fb2f18f8Sesaxe kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw)); 5650e751525SEric Saxe kstat_named_setstr(&pgsp->pg_policy, pg_policy_name((pg_t *)pg)); 566fb2f18f8Sesaxe return (0); 567fb2f18f8Sesaxe } 568*b885580bSAlexander Kolbasov 569*b885580bSAlexander Kolbasov int 570*b885580bSAlexander Kolbasov pghw_cu_kstat_update(kstat_t *ksp, int rw) 571*b885580bSAlexander Kolbasov { 572*b885580bSAlexander Kolbasov struct pghw_cu_kstat *pgsp = &pghw_cu_kstat; 573*b885580bSAlexander Kolbasov pghw_t *pg = ksp->ks_private; 574*b885580bSAlexander Kolbasov pghw_util_t *hw_util = &pg->pghw_stats; 575*b885580bSAlexander Kolbasov 576*b885580bSAlexander Kolbasov if (rw == KSTAT_WRITE) 577*b885580bSAlexander Kolbasov return (EACCES); 578*b885580bSAlexander Kolbasov 579*b885580bSAlexander Kolbasov pgsp->pg_id.value.ui32 = ((pg_t *)pg)->pg_id; 580*b885580bSAlexander Kolbasov pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus); 581*b885580bSAlexander Kolbasov 582*b885580bSAlexander Kolbasov /* 583*b885580bSAlexander Kolbasov * Allocate memory for the string representing the list of CPUs in PG. 584*b885580bSAlexander Kolbasov * This memory should persist past the call to pghw_cu_kstat_update() 585*b885580bSAlexander Kolbasov * since the kstat snapshot routine will reference this memory. 586*b885580bSAlexander Kolbasov */ 587*b885580bSAlexander Kolbasov pghw_cpulist_alloc(pg); 588*b885580bSAlexander Kolbasov 589*b885580bSAlexander Kolbasov if (pg->pghw_kstat_gen != pg->pghw_generation) { 590*b885580bSAlexander Kolbasov /* 591*b885580bSAlexander Kolbasov * PG kstat generation number is out of sync with PG's 592*b885580bSAlexander Kolbasov * generation mumber. It means that some CPUs could have joined 593*b885580bSAlexander Kolbasov * or left PG and it is not possible to compare the numbers 594*b885580bSAlexander Kolbasov * obtained before and after the generation change. 595*b885580bSAlexander Kolbasov * 596*b885580bSAlexander Kolbasov * Reset the maximum utilization rate and start computing it 597*b885580bSAlexander Kolbasov * from scratch. 598*b885580bSAlexander Kolbasov */ 599*b885580bSAlexander Kolbasov hw_util->pghw_util = 0; 600*b885580bSAlexander Kolbasov hw_util->pghw_rate_max = 0; 601*b885580bSAlexander Kolbasov pg->pghw_kstat_gen = pg->pghw_generation; 602*b885580bSAlexander Kolbasov } 603*b885580bSAlexander Kolbasov 604*b885580bSAlexander Kolbasov /* 605*b885580bSAlexander Kolbasov * We can't block on CPU lock because when PG is destroyed (under 606*b885580bSAlexander Kolbasov * cpu_lock) it tries to delete this kstat and it will wait for us to 607*b885580bSAlexander Kolbasov * complete which will never happen since we are waiting for cpu_lock to 608*b885580bSAlexander Kolbasov * drop. Deadlocks are fun! 609*b885580bSAlexander Kolbasov */ 610*b885580bSAlexander Kolbasov if (mutex_tryenter(&cpu_lock)) { 611*b885580bSAlexander Kolbasov if (pg->pghw_cpulist != NULL && 612*b885580bSAlexander Kolbasov *(pg->pghw_cpulist) == '\0') { 613*b885580bSAlexander Kolbasov (void) group2intlist(&(((pg_t *)pg)->pg_cpus), 614*b885580bSAlexander Kolbasov pg->pghw_cpulist, pg->pghw_cpulist_len, cpu2id); 615*b885580bSAlexander Kolbasov } 616*b885580bSAlexander Kolbasov cu_pg_update(pg); 617*b885580bSAlexander Kolbasov mutex_exit(&cpu_lock); 618*b885580bSAlexander Kolbasov } 619*b885580bSAlexander Kolbasov 620*b885580bSAlexander Kolbasov pgsp->pg_generation.value.ui32 = pg->pghw_kstat_gen; 621*b885580bSAlexander Kolbasov pgsp->pg_hw_util.value.ui64 = hw_util->pghw_util; 622*b885580bSAlexander Kolbasov pgsp->pg_hw_util_time_running.value.ui64 = hw_util->pghw_time_running; 623*b885580bSAlexander Kolbasov pgsp->pg_hw_util_time_stopped.value.ui64 = hw_util->pghw_time_stopped; 624*b885580bSAlexander Kolbasov pgsp->pg_hw_util_rate.value.ui64 = hw_util->pghw_rate; 625*b885580bSAlexander Kolbasov pgsp->pg_hw_util_rate_max.value.ui64 = hw_util->pghw_rate_max; 626*b885580bSAlexander Kolbasov if (pg->pghw_cpulist != NULL) 627*b885580bSAlexander Kolbasov kstat_named_setstr(&pgsp->pg_cpus, pg->pghw_cpulist); 628*b885580bSAlexander Kolbasov else 629*b885580bSAlexander Kolbasov kstat_named_setstr(&pgsp->pg_cpus, ""); 630*b885580bSAlexander Kolbasov 631*b885580bSAlexander Kolbasov kstat_named_setstr(&pgsp->pg_sharing, pghw_type_string(pg->pghw_hw)); 632*b885580bSAlexander Kolbasov 633*b885580bSAlexander Kolbasov return (0); 634*b885580bSAlexander Kolbasov } 635*b885580bSAlexander Kolbasov 636*b885580bSAlexander Kolbasov /* 637*b885580bSAlexander Kolbasov * Update the string representation of CPUs in PG (pg->pghw_cpulist). 638*b885580bSAlexander Kolbasov * The string representation is used for kstats. 639*b885580bSAlexander Kolbasov * 640*b885580bSAlexander Kolbasov * The string is allocated if it has not already been or if it is already 641*b885580bSAlexander Kolbasov * allocated and PG has more CPUs now. If PG has smaller or equal number of 642*b885580bSAlexander Kolbasov * CPUs, but the actual CPUs may have changed, the string is reset to the empty 643*b885580bSAlexander Kolbasov * string causes the string representation to be recreated. The pghw_generation 644*b885580bSAlexander Kolbasov * field is used to detect whether CPUs within the pg may have changed. 645*b885580bSAlexander Kolbasov */ 646*b885580bSAlexander Kolbasov static void 647*b885580bSAlexander Kolbasov pghw_cpulist_alloc(pghw_t *pg) 648*b885580bSAlexander Kolbasov { 649*b885580bSAlexander Kolbasov uint_t ncpus = GROUP_SIZE(&((pg_t *)pg)->pg_cpus); 650*b885580bSAlexander Kolbasov size_t len = CPUSTR_LEN(ncpus); 651*b885580bSAlexander Kolbasov 652*b885580bSAlexander Kolbasov /* 653*b885580bSAlexander Kolbasov * If the pghw_cpulist string is already allocated we need to make sure 654*b885580bSAlexander Kolbasov * that it has sufficient length. Also if the set of CPUs may have 655*b885580bSAlexander Kolbasov * changed, we need to re-generate the string. 656*b885580bSAlexander Kolbasov */ 657*b885580bSAlexander Kolbasov if (pg->pghw_cpulist != NULL && 658*b885580bSAlexander Kolbasov pg->pghw_kstat_gen != pg->pghw_generation) { 659*b885580bSAlexander Kolbasov if (len <= pg->pghw_cpulist_len) { 660*b885580bSAlexander Kolbasov /* 661*b885580bSAlexander Kolbasov * There is sufficient space in the pghw_cpulist for 662*b885580bSAlexander Kolbasov * the new set of CPUs. Just clear the string to trigger 663*b885580bSAlexander Kolbasov * re-generation of list of CPUs 664*b885580bSAlexander Kolbasov */ 665*b885580bSAlexander Kolbasov *(pg->pghw_cpulist) = '\0'; 666*b885580bSAlexander Kolbasov } else { 667*b885580bSAlexander Kolbasov /* 668*b885580bSAlexander Kolbasov * There is, potentially, insufficient space in 669*b885580bSAlexander Kolbasov * pghw_cpulist, so reallocate the string. 670*b885580bSAlexander Kolbasov */ 671*b885580bSAlexander Kolbasov ASSERT(strlen(pg->pghw_cpulist) < pg->pghw_cpulist_len); 672*b885580bSAlexander Kolbasov kmem_free(pg->pghw_cpulist, pg->pghw_cpulist_len); 673*b885580bSAlexander Kolbasov pg->pghw_cpulist = NULL; 674*b885580bSAlexander Kolbasov pg->pghw_cpulist_len = 0; 675*b885580bSAlexander Kolbasov } 676*b885580bSAlexander Kolbasov } 677*b885580bSAlexander Kolbasov 678*b885580bSAlexander Kolbasov if (pg->pghw_cpulist == NULL) { 679*b885580bSAlexander Kolbasov /* 680*b885580bSAlexander Kolbasov * Allocate space to hold cpulist. 681*b885580bSAlexander Kolbasov * 682*b885580bSAlexander Kolbasov * Length can not be bigger that the maximum space we have 683*b885580bSAlexander Kolbasov * allowed for the kstat buffer 684*b885580bSAlexander Kolbasov */ 685*b885580bSAlexander Kolbasov if (len > pg_cpulist_maxlen) 686*b885580bSAlexander Kolbasov len = pg_cpulist_maxlen; 687*b885580bSAlexander Kolbasov if (len > 0) { 688*b885580bSAlexander Kolbasov pg->pghw_cpulist = kmem_zalloc(len, KM_NOSLEEP); 689*b885580bSAlexander Kolbasov if (pg->pghw_cpulist != NULL) 690*b885580bSAlexander Kolbasov pg->pghw_cpulist_len = len; 691*b885580bSAlexander Kolbasov } 692*b885580bSAlexander Kolbasov } 693*b885580bSAlexander Kolbasov } 694*b885580bSAlexander Kolbasov 695*b885580bSAlexander Kolbasov static int 696*b885580bSAlexander Kolbasov cpu2id(void *v) 697*b885580bSAlexander Kolbasov { 698*b885580bSAlexander Kolbasov cpu_t *cp = (cpu_t *)v; 699*b885580bSAlexander Kolbasov 700*b885580bSAlexander Kolbasov ASSERT(v != NULL); 701*b885580bSAlexander Kolbasov 702*b885580bSAlexander Kolbasov return (cp->cpu_id); 703*b885580bSAlexander Kolbasov } 704