1fb2f18f8Sesaxe /* 2fb2f18f8Sesaxe * CDDL HEADER START 3fb2f18f8Sesaxe * 4fb2f18f8Sesaxe * The contents of this file are subject to the terms of the 5fb2f18f8Sesaxe * Common Development and Distribution License (the "License"). 6fb2f18f8Sesaxe * You may not use this file except in compliance with the License. 7fb2f18f8Sesaxe * 8fb2f18f8Sesaxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fb2f18f8Sesaxe * or http://www.opensolaris.org/os/licensing. 10fb2f18f8Sesaxe * See the License for the specific language governing permissions 11fb2f18f8Sesaxe * and limitations under the License. 12fb2f18f8Sesaxe * 13fb2f18f8Sesaxe * When distributing Covered Code, include this CDDL HEADER in each 14fb2f18f8Sesaxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fb2f18f8Sesaxe * If applicable, add the following below this CDDL HEADER, with the 16fb2f18f8Sesaxe * fields enclosed by brackets "[]" replaced with your own identifying 17fb2f18f8Sesaxe * information: Portions Copyright [yyyy] [name of copyright owner] 18fb2f18f8Sesaxe * 19fb2f18f8Sesaxe * CDDL HEADER END 20fb2f18f8Sesaxe */ 21fb2f18f8Sesaxe /* 220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23fb2f18f8Sesaxe * Use is subject to license terms. 24fb2f18f8Sesaxe */ 25fb2f18f8Sesaxe 26fb2f18f8Sesaxe #include <sys/systm.h> 27fb2f18f8Sesaxe #include <sys/types.h> 28fb2f18f8Sesaxe #include <sys/param.h> 29fb2f18f8Sesaxe #include <sys/thread.h> 30fb2f18f8Sesaxe #include <sys/cpuvar.h> 31fb2f18f8Sesaxe #include <sys/cpupart.h> 32fb2f18f8Sesaxe #include <sys/kmem.h> 33fb2f18f8Sesaxe #include <sys/cmn_err.h> 34fb2f18f8Sesaxe #include <sys/kstat.h> 35fb2f18f8Sesaxe #include <sys/processor.h> 36fb2f18f8Sesaxe #include <sys/disp.h> 37fb2f18f8Sesaxe #include <sys/group.h> 38fb2f18f8Sesaxe #include <sys/pg.h> 39fb2f18f8Sesaxe 40fb2f18f8Sesaxe /* 41fb2f18f8Sesaxe * Processor groups 42fb2f18f8Sesaxe * 43fb2f18f8Sesaxe * With the introduction of Chip Multi-Threaded (CMT) processor architectures, 44fb2f18f8Sesaxe * it is no longer necessarily true that a given physical processor module 45fb2f18f8Sesaxe * will present itself as a single schedulable entity (cpu_t). Rather, each 46fb2f18f8Sesaxe * chip and/or processor core may present itself as one or more "logical" CPUs. 47fb2f18f8Sesaxe * 48fb2f18f8Sesaxe * The logical CPUs presented may share physical components such as caches, 49fb2f18f8Sesaxe * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the 50fb2f18f8Sesaxe * kernel be aware of the relationships existing between logical CPUs so that 51fb2f18f8Sesaxe * the appropriate optmizations may be employed. 52fb2f18f8Sesaxe * 53fb2f18f8Sesaxe * The processor group abstraction represents a set of logical CPUs that 54fb2f18f8Sesaxe * generally share some sort of physical or characteristic relationship. 55fb2f18f8Sesaxe * 56fb2f18f8Sesaxe * In the case of a physical sharing relationship, the CPUs in the group may 57fb2f18f8Sesaxe * share a pipeline, cache or floating point unit. In the case of a logical 58fb2f18f8Sesaxe * relationship, a PG may represent the set of CPUs in a processor set, or the 59fb2f18f8Sesaxe * set of CPUs running at a particular clock speed. 60fb2f18f8Sesaxe * 61fb2f18f8Sesaxe * The generic processor group structure, pg_t, contains the elements generic 62fb2f18f8Sesaxe * to a group of CPUs. Depending on the nature of the CPU relationship 63fb2f18f8Sesaxe * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that 64fb2f18f8Sesaxe * PG where more specific data is represented. 65fb2f18f8Sesaxe * 66fb2f18f8Sesaxe * As an example, a PG representing a PHYSICAL relationship, may be recast to 67fb2f18f8Sesaxe * a pghw_t, where data further describing the hardware sharing relationship 68fb2f18f8Sesaxe * is maintained. See pghw.c and pghw.h for details on physical PGs. 69fb2f18f8Sesaxe * 70fb2f18f8Sesaxe * At this time a more specialized casting of a PG representing a LOGICAL 71fb2f18f8Sesaxe * relationship has not been implemented, but the architecture allows for this 72fb2f18f8Sesaxe * in the future. 73fb2f18f8Sesaxe * 74fb2f18f8Sesaxe * Processor Group Classes 75fb2f18f8Sesaxe * 76fb2f18f8Sesaxe * Processor group consumers may wish to maintain and associate specific 77fb2f18f8Sesaxe * data with the PGs they create. For this reason, a mechanism for creating 78fb2f18f8Sesaxe * class specific PGs exists. Classes may overload the default functions for 79fb2f18f8Sesaxe * creating, destroying, and associating CPUs with PGs, and may also register 80fb2f18f8Sesaxe * class specific callbacks to be invoked when the CPU related system 81fb2f18f8Sesaxe * configuration changes. Class specific data is stored/associated with 82fb2f18f8Sesaxe * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first 83fb2f18f8Sesaxe * element of a class specific PG object. In memory, such a structure may look 84fb2f18f8Sesaxe * like: 85fb2f18f8Sesaxe * 86fb2f18f8Sesaxe * ----------------------- - - - 87fb2f18f8Sesaxe * | common | | | | <--(pg_t *) 88fb2f18f8Sesaxe * ----------------------- | | - 89fb2f18f8Sesaxe * | HW specific | | | <-----(pghw_t *) 90fb2f18f8Sesaxe * ----------------------- | - 91fb2f18f8Sesaxe * | class specific | | <-------(pg_cmt_t *) 92fb2f18f8Sesaxe * ----------------------- - 93fb2f18f8Sesaxe * 94fb2f18f8Sesaxe * Access to the PG class specific data can be had by casting a pointer to 95fb2f18f8Sesaxe * it's class specific view. 96fb2f18f8Sesaxe */ 97fb2f18f8Sesaxe 98fb2f18f8Sesaxe static pg_t *pg_alloc_default(pg_class_t); 99fb2f18f8Sesaxe static void pg_free_default(pg_t *); 1000e751525SEric Saxe static void pg_null_op(); 101fb2f18f8Sesaxe 102fb2f18f8Sesaxe /* 103fb2f18f8Sesaxe * Bootstrap CPU specific PG data 104fb2f18f8Sesaxe * See pg_cpu_bootstrap() 105fb2f18f8Sesaxe */ 106fb2f18f8Sesaxe static cpu_pg_t bootstrap_pg_data; 107fb2f18f8Sesaxe 108fb2f18f8Sesaxe /* 109fb2f18f8Sesaxe * Bitset of allocated PG ids (they are sequential) 110fb2f18f8Sesaxe * and the next free id in the set. 111fb2f18f8Sesaxe */ 112fb2f18f8Sesaxe static bitset_t pg_id_set; 113fb2f18f8Sesaxe static pgid_t pg_id_next = 0; 114fb2f18f8Sesaxe 115fb2f18f8Sesaxe /* 116fb2f18f8Sesaxe * Default and externed PG ops vectors 117fb2f18f8Sesaxe */ 118fb2f18f8Sesaxe static struct pg_ops pg_ops_default = { 119fb2f18f8Sesaxe pg_alloc_default, /* alloc */ 120fb2f18f8Sesaxe pg_free_default, /* free */ 121fb2f18f8Sesaxe NULL, /* cpu_init */ 122fb2f18f8Sesaxe NULL, /* cpu_fini */ 123fb2f18f8Sesaxe NULL, /* cpu_active */ 124fb2f18f8Sesaxe NULL, /* cpu_inactive */ 125fb2f18f8Sesaxe NULL, /* cpupart_in */ 126fb2f18f8Sesaxe NULL, /* cpupart_out */ 127fb2f18f8Sesaxe NULL, /* cpupart_move */ 128fb2f18f8Sesaxe NULL, /* cpu_belongs */ 1290e751525SEric Saxe NULL, /* policy_name */ 1300e751525SEric Saxe }; 1310e751525SEric Saxe 1320e751525SEric Saxe static struct pg_cb_ops pg_cb_ops_default = { 1330e751525SEric Saxe pg_null_op, /* thread_swtch */ 1340e751525SEric Saxe pg_null_op, /* thread_remain */ 135fb2f18f8Sesaxe }; 136fb2f18f8Sesaxe 137fb2f18f8Sesaxe /* 138fb2f18f8Sesaxe * Class specific PG allocation callbacks 139fb2f18f8Sesaxe */ 140fb2f18f8Sesaxe #define PG_ALLOC(class) \ 141fb2f18f8Sesaxe (pg_classes[class].pgc_ops->alloc ? \ 142fb2f18f8Sesaxe pg_classes[class].pgc_ops->alloc() : \ 143fb2f18f8Sesaxe pg_classes[pg_default_cid].pgc_ops->alloc()) 144fb2f18f8Sesaxe 145fb2f18f8Sesaxe #define PG_FREE(pg) \ 146fb2f18f8Sesaxe ((pg)->pg_class->pgc_ops->free ? \ 147fb2f18f8Sesaxe (pg)->pg_class->pgc_ops->free(pg) : \ 148fb2f18f8Sesaxe pg_classes[pg_default_cid].pgc_ops->free(pg)) \ 149fb2f18f8Sesaxe 150fb2f18f8Sesaxe 151fb2f18f8Sesaxe /* 1520e751525SEric Saxe * Class specific PG policy name 1530e751525SEric Saxe */ 1540e751525SEric Saxe #define PG_POLICY_NAME(pg) \ 1550e751525SEric Saxe ((pg)->pg_class->pgc_ops->policy_name ? \ 1560e751525SEric Saxe (pg)->pg_class->pgc_ops->policy_name(pg) : NULL) \ 1570e751525SEric Saxe 1580e751525SEric Saxe /* 159fb2f18f8Sesaxe * Class specific membership test callback 160fb2f18f8Sesaxe */ 161fb2f18f8Sesaxe #define PG_CPU_BELONGS(pg, cp) \ 162fb2f18f8Sesaxe ((pg)->pg_class->pgc_ops->cpu_belongs ? \ 163fb2f18f8Sesaxe (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0) \ 164fb2f18f8Sesaxe 165fb2f18f8Sesaxe /* 166fb2f18f8Sesaxe * CPU configuration callbacks 167fb2f18f8Sesaxe */ 16847ab0c7cSEric Saxe #define PG_CPU_INIT(class, cp, cpu_pg) \ 169fb2f18f8Sesaxe { \ 170fb2f18f8Sesaxe if (pg_classes[class].pgc_ops->cpu_init) \ 17147ab0c7cSEric Saxe pg_classes[class].pgc_ops->cpu_init(cp, cpu_pg); \ 172fb2f18f8Sesaxe } 173fb2f18f8Sesaxe 17447ab0c7cSEric Saxe #define PG_CPU_FINI(class, cp, cpu_pg) \ 175fb2f18f8Sesaxe { \ 176fb2f18f8Sesaxe if (pg_classes[class].pgc_ops->cpu_fini) \ 17747ab0c7cSEric Saxe pg_classes[class].pgc_ops->cpu_fini(cp, cpu_pg); \ 178fb2f18f8Sesaxe } 179fb2f18f8Sesaxe 180fb2f18f8Sesaxe #define PG_CPU_ACTIVE(class, cp) \ 181fb2f18f8Sesaxe { \ 182fb2f18f8Sesaxe if (pg_classes[class].pgc_ops->cpu_active) \ 183fb2f18f8Sesaxe pg_classes[class].pgc_ops->cpu_active(cp); \ 184fb2f18f8Sesaxe } 185fb2f18f8Sesaxe 186fb2f18f8Sesaxe #define PG_CPU_INACTIVE(class, cp) \ 187fb2f18f8Sesaxe { \ 188fb2f18f8Sesaxe if (pg_classes[class].pgc_ops->cpu_inactive) \ 189fb2f18f8Sesaxe pg_classes[class].pgc_ops->cpu_inactive(cp); \ 190fb2f18f8Sesaxe } 191fb2f18f8Sesaxe 192fb2f18f8Sesaxe /* 193fb2f18f8Sesaxe * CPU / cpupart configuration callbacks 194fb2f18f8Sesaxe */ 195fb2f18f8Sesaxe #define PG_CPUPART_IN(class, cp, pp) \ 196fb2f18f8Sesaxe { \ 197fb2f18f8Sesaxe if (pg_classes[class].pgc_ops->cpupart_in) \ 198fb2f18f8Sesaxe pg_classes[class].pgc_ops->cpupart_in(cp, pp); \ 199fb2f18f8Sesaxe } 200fb2f18f8Sesaxe 201fb2f18f8Sesaxe #define PG_CPUPART_OUT(class, cp, pp) \ 202fb2f18f8Sesaxe { \ 203fb2f18f8Sesaxe if (pg_classes[class].pgc_ops->cpupart_out) \ 204fb2f18f8Sesaxe pg_classes[class].pgc_ops->cpupart_out(cp, pp); \ 205fb2f18f8Sesaxe } 206fb2f18f8Sesaxe 207fb2f18f8Sesaxe #define PG_CPUPART_MOVE(class, cp, old, new) \ 208fb2f18f8Sesaxe { \ 209fb2f18f8Sesaxe if (pg_classes[class].pgc_ops->cpupart_move) \ 210fb2f18f8Sesaxe pg_classes[class].pgc_ops->cpupart_move(cp, old, new); \ 211fb2f18f8Sesaxe } 212fb2f18f8Sesaxe 213fb2f18f8Sesaxe 214fb2f18f8Sesaxe 215fb2f18f8Sesaxe static pg_class_t *pg_classes; 216fb2f18f8Sesaxe static int pg_nclasses; 217fb2f18f8Sesaxe 218fb2f18f8Sesaxe static pg_cid_t pg_default_cid; 219fb2f18f8Sesaxe 220fb2f18f8Sesaxe /* 2210e751525SEric Saxe * Initialze common PG subsystem. 222fb2f18f8Sesaxe */ 223fb2f18f8Sesaxe void 224fb2f18f8Sesaxe pg_init(void) 225fb2f18f8Sesaxe { 2260e751525SEric Saxe extern void pg_cmt_class_init(); 227*ad7a79fdSEric Saxe extern void pg_cmt_cpu_startup(); 2280e751525SEric Saxe 229fb2f18f8Sesaxe pg_default_cid = 230fb2f18f8Sesaxe pg_class_register("default", &pg_ops_default, PGR_LOGICAL); 2310e751525SEric Saxe 2320e751525SEric Saxe /* 2330e751525SEric Saxe * Initialize classes to allow them to register with the framework 2340e751525SEric Saxe */ 2350e751525SEric Saxe pg_cmt_class_init(); 2360e751525SEric Saxe 2370e751525SEric Saxe pg_cpu0_init(); 238*ad7a79fdSEric Saxe pg_cmt_cpu_startup(CPU); 239fb2f18f8Sesaxe } 240fb2f18f8Sesaxe 241fb2f18f8Sesaxe /* 242fb2f18f8Sesaxe * Perform CPU 0 initialization 243fb2f18f8Sesaxe */ 244fb2f18f8Sesaxe void 245fb2f18f8Sesaxe pg_cpu0_init(void) 246fb2f18f8Sesaxe { 247fb2f18f8Sesaxe extern void pghw_physid_create(); 248fb2f18f8Sesaxe 249fb2f18f8Sesaxe /* 250fb2f18f8Sesaxe * Create the physical ID cache for the boot CPU 251fb2f18f8Sesaxe */ 252fb2f18f8Sesaxe pghw_physid_create(CPU); 253fb2f18f8Sesaxe 254fb2f18f8Sesaxe /* 255fb2f18f8Sesaxe * pg_cpu_* require that cpu_lock be held 256fb2f18f8Sesaxe */ 257fb2f18f8Sesaxe mutex_enter(&cpu_lock); 258fb2f18f8Sesaxe 259fb2f18f8Sesaxe pg_cpu_init(CPU); 260fb2f18f8Sesaxe pg_cpupart_in(CPU, &cp_default); 261fb2f18f8Sesaxe pg_cpu_active(CPU); 262fb2f18f8Sesaxe 263fb2f18f8Sesaxe mutex_exit(&cpu_lock); 264fb2f18f8Sesaxe } 265fb2f18f8Sesaxe 266fb2f18f8Sesaxe /* 267a6604450Sesaxe * Invoked when topology for CPU0 changes 268a6604450Sesaxe * post pg_cpu0_init(). 269a6604450Sesaxe * 270a6604450Sesaxe * Currently happens as a result of null_proc_lpa 271a6604450Sesaxe * on Starcat. 272a6604450Sesaxe */ 273a6604450Sesaxe void 274a6604450Sesaxe pg_cpu0_reinit(void) 275a6604450Sesaxe { 276a6604450Sesaxe mutex_enter(&cpu_lock); 277a6604450Sesaxe pg_cpu_inactive(CPU); 278a6604450Sesaxe pg_cpupart_out(CPU, &cp_default); 279a6604450Sesaxe pg_cpu_fini(CPU); 280a6604450Sesaxe 281a6604450Sesaxe pg_cpu_init(CPU); 282a6604450Sesaxe pg_cpupart_in(CPU, &cp_default); 283a6604450Sesaxe pg_cpu_active(CPU); 284a6604450Sesaxe mutex_exit(&cpu_lock); 285a6604450Sesaxe } 286a6604450Sesaxe 287a6604450Sesaxe /* 288fb2f18f8Sesaxe * Register a new PG class 289fb2f18f8Sesaxe */ 290fb2f18f8Sesaxe pg_cid_t 291fb2f18f8Sesaxe pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation) 292fb2f18f8Sesaxe { 293fb2f18f8Sesaxe pg_class_t *newclass; 294fb2f18f8Sesaxe pg_class_t *classes_old; 295fb2f18f8Sesaxe id_t cid; 296fb2f18f8Sesaxe 297fb2f18f8Sesaxe mutex_enter(&cpu_lock); 298fb2f18f8Sesaxe 299fb2f18f8Sesaxe /* 300fb2f18f8Sesaxe * Allocate a new pg_class_t in the pg_classes array 301fb2f18f8Sesaxe */ 302fb2f18f8Sesaxe if (pg_nclasses == 0) { 303fb2f18f8Sesaxe pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP); 304fb2f18f8Sesaxe } else { 305fb2f18f8Sesaxe classes_old = pg_classes; 306fb2f18f8Sesaxe pg_classes = 307fb2f18f8Sesaxe kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1), 308fb2f18f8Sesaxe KM_SLEEP); 309fb2f18f8Sesaxe (void) kcopy(classes_old, pg_classes, 310fb2f18f8Sesaxe sizeof (pg_class_t) * pg_nclasses); 311fb2f18f8Sesaxe kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses); 312fb2f18f8Sesaxe } 313fb2f18f8Sesaxe 314fb2f18f8Sesaxe cid = pg_nclasses++; 315fb2f18f8Sesaxe newclass = &pg_classes[cid]; 316fb2f18f8Sesaxe 317fb2f18f8Sesaxe (void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX); 318fb2f18f8Sesaxe newclass->pgc_id = cid; 319fb2f18f8Sesaxe newclass->pgc_ops = ops; 320fb2f18f8Sesaxe newclass->pgc_relation = relation; 321fb2f18f8Sesaxe 322fb2f18f8Sesaxe mutex_exit(&cpu_lock); 323fb2f18f8Sesaxe 324fb2f18f8Sesaxe return (cid); 325fb2f18f8Sesaxe } 326fb2f18f8Sesaxe 327fb2f18f8Sesaxe /* 328fb2f18f8Sesaxe * Try to find an existing pg in set in which to place cp. 329fb2f18f8Sesaxe * Returns the pg if found, and NULL otherwise. 330fb2f18f8Sesaxe * In the event that the CPU could belong to multiple 331fb2f18f8Sesaxe * PGs in the set, the first matching PG will be returned. 332fb2f18f8Sesaxe */ 333fb2f18f8Sesaxe pg_t * 334fb2f18f8Sesaxe pg_cpu_find_pg(cpu_t *cp, group_t *set) 335fb2f18f8Sesaxe { 336fb2f18f8Sesaxe pg_t *pg; 337fb2f18f8Sesaxe group_iter_t i; 338fb2f18f8Sesaxe 339fb2f18f8Sesaxe group_iter_init(&i); 340fb2f18f8Sesaxe while ((pg = group_iterate(set, &i)) != NULL) { 341fb2f18f8Sesaxe /* 342fb2f18f8Sesaxe * Ask the class if the CPU belongs here 343fb2f18f8Sesaxe */ 344fb2f18f8Sesaxe if (PG_CPU_BELONGS(pg, cp)) 345fb2f18f8Sesaxe return (pg); 346fb2f18f8Sesaxe } 347fb2f18f8Sesaxe return (NULL); 348fb2f18f8Sesaxe } 349fb2f18f8Sesaxe 350fb2f18f8Sesaxe /* 351fb2f18f8Sesaxe * Iterate over the CPUs in a PG after initializing 352fb2f18f8Sesaxe * the iterator with PG_CPU_ITR_INIT() 353fb2f18f8Sesaxe */ 354fb2f18f8Sesaxe cpu_t * 355fb2f18f8Sesaxe pg_cpu_next(pg_cpu_itr_t *itr) 356fb2f18f8Sesaxe { 357fb2f18f8Sesaxe cpu_t *cpu; 358fb2f18f8Sesaxe pg_t *pg = itr->pg; 359fb2f18f8Sesaxe 360fb2f18f8Sesaxe cpu = group_iterate(&pg->pg_cpus, &itr->position); 361fb2f18f8Sesaxe return (cpu); 362fb2f18f8Sesaxe } 363fb2f18f8Sesaxe 364fb2f18f8Sesaxe /* 3650e751525SEric Saxe * Test if a given PG contains a given CPU 3660e751525SEric Saxe */ 3670e751525SEric Saxe boolean_t 3680e751525SEric Saxe pg_cpu_find(pg_t *pg, cpu_t *cp) 3690e751525SEric Saxe { 3700e751525SEric Saxe if (group_find(&pg->pg_cpus, cp) == (uint_t)-1) 3710e751525SEric Saxe return (B_FALSE); 3720e751525SEric Saxe 3730e751525SEric Saxe return (B_TRUE); 3740e751525SEric Saxe } 3750e751525SEric Saxe 3760e751525SEric Saxe /* 3770e751525SEric Saxe * Set the PGs callbacks to the default 3780e751525SEric Saxe */ 3790e751525SEric Saxe void 3800e751525SEric Saxe pg_callback_set_defaults(pg_t *pg) 3810e751525SEric Saxe { 3820e751525SEric Saxe bcopy(&pg_cb_ops_default, &pg->pg_cb, sizeof (struct pg_cb_ops)); 3830e751525SEric Saxe } 3840e751525SEric Saxe 3850e751525SEric Saxe /* 386fb2f18f8Sesaxe * Create a PG of a given class. 387fb2f18f8Sesaxe * This routine may block. 388fb2f18f8Sesaxe */ 389fb2f18f8Sesaxe pg_t * 390fb2f18f8Sesaxe pg_create(pg_cid_t cid) 391fb2f18f8Sesaxe { 392fb2f18f8Sesaxe pg_t *pg; 393fb2f18f8Sesaxe pgid_t id; 394fb2f18f8Sesaxe 395fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 396fb2f18f8Sesaxe 397fb2f18f8Sesaxe /* 398fb2f18f8Sesaxe * Call the class specific PG allocation routine 399fb2f18f8Sesaxe */ 400fb2f18f8Sesaxe pg = PG_ALLOC(cid); 401fb2f18f8Sesaxe pg->pg_class = &pg_classes[cid]; 402fb2f18f8Sesaxe pg->pg_relation = pg->pg_class->pgc_relation; 403fb2f18f8Sesaxe 404fb2f18f8Sesaxe /* 405fb2f18f8Sesaxe * Find the next free sequential pg id 406fb2f18f8Sesaxe */ 407fb2f18f8Sesaxe do { 408fb2f18f8Sesaxe if (pg_id_next >= bitset_capacity(&pg_id_set)) 409fb2f18f8Sesaxe bitset_resize(&pg_id_set, pg_id_next + 1); 410fb2f18f8Sesaxe id = pg_id_next++; 411fb2f18f8Sesaxe } while (bitset_in_set(&pg_id_set, id)); 412fb2f18f8Sesaxe 413fb2f18f8Sesaxe pg->pg_id = id; 414fb2f18f8Sesaxe bitset_add(&pg_id_set, pg->pg_id); 415fb2f18f8Sesaxe 416fb2f18f8Sesaxe /* 417fb2f18f8Sesaxe * Create the PG's CPU group 418fb2f18f8Sesaxe */ 419fb2f18f8Sesaxe group_create(&pg->pg_cpus); 420fb2f18f8Sesaxe 4210e751525SEric Saxe /* 4220e751525SEric Saxe * Initialize the events ops vector 4230e751525SEric Saxe */ 4240e751525SEric Saxe pg_callback_set_defaults(pg); 4250e751525SEric Saxe 426fb2f18f8Sesaxe return (pg); 427fb2f18f8Sesaxe } 428fb2f18f8Sesaxe 429fb2f18f8Sesaxe /* 430fb2f18f8Sesaxe * Destroy a PG. 431fb2f18f8Sesaxe * This routine may block. 432fb2f18f8Sesaxe */ 433fb2f18f8Sesaxe void 434fb2f18f8Sesaxe pg_destroy(pg_t *pg) 435fb2f18f8Sesaxe { 436fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 437fb2f18f8Sesaxe 438fb2f18f8Sesaxe group_destroy(&pg->pg_cpus); 439fb2f18f8Sesaxe 440fb2f18f8Sesaxe /* 441fb2f18f8Sesaxe * Unassign the pg_id 442fb2f18f8Sesaxe */ 443fb2f18f8Sesaxe if (pg_id_next > pg->pg_id) 444fb2f18f8Sesaxe pg_id_next = pg->pg_id; 445fb2f18f8Sesaxe bitset_del(&pg_id_set, pg->pg_id); 446fb2f18f8Sesaxe 447fb2f18f8Sesaxe /* 448fb2f18f8Sesaxe * Invoke the class specific de-allocation routine 449fb2f18f8Sesaxe */ 450fb2f18f8Sesaxe PG_FREE(pg); 451fb2f18f8Sesaxe } 452fb2f18f8Sesaxe 453fb2f18f8Sesaxe /* 454fb2f18f8Sesaxe * Add the CPU "cp" to processor group "pg" 455fb2f18f8Sesaxe * This routine may block. 456fb2f18f8Sesaxe */ 457fb2f18f8Sesaxe void 45847ab0c7cSEric Saxe pg_cpu_add(pg_t *pg, cpu_t *cp, cpu_pg_t *cpu_pg) 459fb2f18f8Sesaxe { 460fb2f18f8Sesaxe int err; 461fb2f18f8Sesaxe 462fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 463fb2f18f8Sesaxe 464fb2f18f8Sesaxe /* This adds the CPU to the PG's CPU group */ 465fb2f18f8Sesaxe err = group_add(&pg->pg_cpus, cp, GRP_RESIZE); 466fb2f18f8Sesaxe ASSERT(err == 0); 467fb2f18f8Sesaxe 46847ab0c7cSEric Saxe /* 46947ab0c7cSEric Saxe * The CPU should be referencing the bootstrap PG data still 47047ab0c7cSEric Saxe * at this point, since this routine may block causing us to 47147ab0c7cSEric Saxe * enter the dispatcher. 47247ab0c7cSEric Saxe */ 4731a77c24bSEric Saxe ASSERT(pg_cpu_is_bootstrapped(cp)); 47447ab0c7cSEric Saxe 475fb2f18f8Sesaxe /* This adds the PG to the CPUs PG group */ 47647ab0c7cSEric Saxe err = group_add(&cpu_pg->pgs, pg, GRP_RESIZE); 477fb2f18f8Sesaxe ASSERT(err == 0); 478fb2f18f8Sesaxe } 479fb2f18f8Sesaxe 480fb2f18f8Sesaxe /* 481fb2f18f8Sesaxe * Remove "cp" from "pg". 482fb2f18f8Sesaxe * This routine may block. 483fb2f18f8Sesaxe */ 484fb2f18f8Sesaxe void 48547ab0c7cSEric Saxe pg_cpu_delete(pg_t *pg, cpu_t *cp, cpu_pg_t *cpu_pg) 486fb2f18f8Sesaxe { 487fb2f18f8Sesaxe int err; 488fb2f18f8Sesaxe 489fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 490fb2f18f8Sesaxe 491fb2f18f8Sesaxe /* Remove the CPU from the PG */ 492fb2f18f8Sesaxe err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE); 493fb2f18f8Sesaxe ASSERT(err == 0); 494fb2f18f8Sesaxe 49547ab0c7cSEric Saxe /* 49647ab0c7cSEric Saxe * The CPU should be referencing the bootstrap PG data still 49747ab0c7cSEric Saxe * at this point, since this routine may block causing us to 49847ab0c7cSEric Saxe * enter the dispatcher. 49947ab0c7cSEric Saxe */ 5001a77c24bSEric Saxe ASSERT(pg_cpu_is_bootstrapped(cp)); 50147ab0c7cSEric Saxe 502fb2f18f8Sesaxe /* Remove the PG from the CPU's PG group */ 50347ab0c7cSEric Saxe err = group_remove(&cpu_pg->pgs, pg, GRP_RESIZE); 504fb2f18f8Sesaxe ASSERT(err == 0); 505fb2f18f8Sesaxe } 506fb2f18f8Sesaxe 507fb2f18f8Sesaxe /* 508fb2f18f8Sesaxe * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg 509fb2f18f8Sesaxe */ 510fb2f18f8Sesaxe static cpu_pg_t * 511fb2f18f8Sesaxe pg_cpu_data_alloc(void) 512fb2f18f8Sesaxe { 513fb2f18f8Sesaxe cpu_pg_t *pgd; 514fb2f18f8Sesaxe 515fb2f18f8Sesaxe pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP); 516fb2f18f8Sesaxe group_create(&pgd->pgs); 517fb2f18f8Sesaxe group_create(&pgd->cmt_pgs); 518fb2f18f8Sesaxe 519fb2f18f8Sesaxe return (pgd); 520fb2f18f8Sesaxe } 521fb2f18f8Sesaxe 522fb2f18f8Sesaxe /* 523fb2f18f8Sesaxe * Free the CPU's PG data. 524fb2f18f8Sesaxe */ 525fb2f18f8Sesaxe static void 526fb2f18f8Sesaxe pg_cpu_data_free(cpu_pg_t *pgd) 527fb2f18f8Sesaxe { 528fb2f18f8Sesaxe group_destroy(&pgd->pgs); 529fb2f18f8Sesaxe group_destroy(&pgd->cmt_pgs); 530fb2f18f8Sesaxe kmem_free(pgd, sizeof (cpu_pg_t)); 531fb2f18f8Sesaxe } 532fb2f18f8Sesaxe 533fb2f18f8Sesaxe /* 534fb2f18f8Sesaxe * A new CPU is coming into the system, either via booting or DR. 535fb2f18f8Sesaxe * Allocate it's PG data, and notify all registered classes about 536fb2f18f8Sesaxe * the new CPU. 537fb2f18f8Sesaxe * 538fb2f18f8Sesaxe * This routine may block. 539fb2f18f8Sesaxe */ 540fb2f18f8Sesaxe void 541fb2f18f8Sesaxe pg_cpu_init(cpu_t *cp) 542fb2f18f8Sesaxe { 543fb2f18f8Sesaxe pg_cid_t i; 54447ab0c7cSEric Saxe cpu_pg_t *cpu_pg; 545fb2f18f8Sesaxe 546fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 547fb2f18f8Sesaxe 548fb2f18f8Sesaxe /* 549fb2f18f8Sesaxe * Allocate and size the per CPU pg data 55047ab0c7cSEric Saxe * 55147ab0c7cSEric Saxe * The CPU's PG data will be populated by the various 55247ab0c7cSEric Saxe * PG classes during the invocation of the PG_CPU_INIT() 55347ab0c7cSEric Saxe * callback below. 55447ab0c7cSEric Saxe * 55547ab0c7cSEric Saxe * Since the we could block and enter the dispatcher during 55647ab0c7cSEric Saxe * this process, the CPU will continue to reference the bootstrap 55747ab0c7cSEric Saxe * PG data until all the initialization completes. 558fb2f18f8Sesaxe */ 5591a77c24bSEric Saxe ASSERT(pg_cpu_is_bootstrapped(cp)); 56047ab0c7cSEric Saxe 56147ab0c7cSEric Saxe cpu_pg = pg_cpu_data_alloc(); 562fb2f18f8Sesaxe 563fb2f18f8Sesaxe /* 564fb2f18f8Sesaxe * Notify all registered classes about the new CPU 565fb2f18f8Sesaxe */ 566fb2f18f8Sesaxe for (i = 0; i < pg_nclasses; i++) 56747ab0c7cSEric Saxe PG_CPU_INIT(i, cp, cpu_pg); 56847ab0c7cSEric Saxe 56947ab0c7cSEric Saxe /* 57047ab0c7cSEric Saxe * The CPU's PG data is now ready to use. 57147ab0c7cSEric Saxe */ 57247ab0c7cSEric Saxe cp->cpu_pg = cpu_pg; 573fb2f18f8Sesaxe } 574fb2f18f8Sesaxe 575fb2f18f8Sesaxe /* 576fb2f18f8Sesaxe * This CPU is being deleted from the system. Notify the classes 577fb2f18f8Sesaxe * and free up the CPU's PG data. 578fb2f18f8Sesaxe */ 579fb2f18f8Sesaxe void 580fb2f18f8Sesaxe pg_cpu_fini(cpu_t *cp) 581fb2f18f8Sesaxe { 582fb2f18f8Sesaxe pg_cid_t i; 58347ab0c7cSEric Saxe cpu_pg_t *cpu_pg; 584fb2f18f8Sesaxe 585fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 586fb2f18f8Sesaxe 58747ab0c7cSEric Saxe cpu_pg = cp->cpu_pg; 58847ab0c7cSEric Saxe 589fb2f18f8Sesaxe /* 590fb2f18f8Sesaxe * This can happen if the CPU coming into the system 591fb2f18f8Sesaxe * failed to power on. 592fb2f18f8Sesaxe */ 5931a77c24bSEric Saxe if (cpu_pg == NULL || pg_cpu_is_bootstrapped(cp)) 594fb2f18f8Sesaxe return; 595fb2f18f8Sesaxe 59647ab0c7cSEric Saxe /* 59747ab0c7cSEric Saxe * Have the CPU reference the bootstrap PG data to survive 59847ab0c7cSEric Saxe * the dispatcher should it block from here on out. 59947ab0c7cSEric Saxe */ 6001a77c24bSEric Saxe pg_cpu_bootstrap(cp); 601fb2f18f8Sesaxe 60247ab0c7cSEric Saxe for (i = 0; i < pg_nclasses; i++) 60347ab0c7cSEric Saxe PG_CPU_FINI(i, cp, cpu_pg); 60447ab0c7cSEric Saxe 60547ab0c7cSEric Saxe pg_cpu_data_free(cpu_pg); 606fb2f18f8Sesaxe } 607fb2f18f8Sesaxe 608fb2f18f8Sesaxe /* 609fb2f18f8Sesaxe * This CPU is becoming active (online) 610fb2f18f8Sesaxe * This routine may not block as it is called from paused CPUs 611fb2f18f8Sesaxe * context. 612fb2f18f8Sesaxe */ 613fb2f18f8Sesaxe void 614fb2f18f8Sesaxe pg_cpu_active(cpu_t *cp) 615fb2f18f8Sesaxe { 616fb2f18f8Sesaxe pg_cid_t i; 617fb2f18f8Sesaxe 618fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 619fb2f18f8Sesaxe 620fb2f18f8Sesaxe /* 621fb2f18f8Sesaxe * Notify all registered classes about the new CPU 622fb2f18f8Sesaxe */ 623fb2f18f8Sesaxe for (i = 0; i < pg_nclasses; i++) 624fb2f18f8Sesaxe PG_CPU_ACTIVE(i, cp); 625fb2f18f8Sesaxe } 626fb2f18f8Sesaxe 627fb2f18f8Sesaxe /* 628fb2f18f8Sesaxe * This CPU is going inactive (offline) 629fb2f18f8Sesaxe * This routine may not block, as it is called from paused 630fb2f18f8Sesaxe * CPUs context. 631fb2f18f8Sesaxe */ 632fb2f18f8Sesaxe void 633fb2f18f8Sesaxe pg_cpu_inactive(cpu_t *cp) 634fb2f18f8Sesaxe { 635fb2f18f8Sesaxe pg_cid_t i; 636fb2f18f8Sesaxe 637fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 638fb2f18f8Sesaxe 639fb2f18f8Sesaxe /* 640fb2f18f8Sesaxe * Notify all registered classes about the new CPU 641fb2f18f8Sesaxe */ 642fb2f18f8Sesaxe for (i = 0; i < pg_nclasses; i++) 643fb2f18f8Sesaxe PG_CPU_INACTIVE(i, cp); 644fb2f18f8Sesaxe } 645fb2f18f8Sesaxe 646fb2f18f8Sesaxe /* 647fb2f18f8Sesaxe * Invoked when the CPU is about to move into the partition 648fb2f18f8Sesaxe * This routine may block. 649fb2f18f8Sesaxe */ 650fb2f18f8Sesaxe void 651fb2f18f8Sesaxe pg_cpupart_in(cpu_t *cp, cpupart_t *pp) 652fb2f18f8Sesaxe { 653fb2f18f8Sesaxe int i; 654fb2f18f8Sesaxe 655fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 656fb2f18f8Sesaxe 657fb2f18f8Sesaxe /* 658fb2f18f8Sesaxe * Notify all registered classes that the 659fb2f18f8Sesaxe * CPU is about to enter the CPU partition 660fb2f18f8Sesaxe */ 661fb2f18f8Sesaxe for (i = 0; i < pg_nclasses; i++) 662fb2f18f8Sesaxe PG_CPUPART_IN(i, cp, pp); 663fb2f18f8Sesaxe } 664fb2f18f8Sesaxe 665fb2f18f8Sesaxe /* 666fb2f18f8Sesaxe * Invoked when the CPU is about to move out of the partition 667fb2f18f8Sesaxe * This routine may block. 668fb2f18f8Sesaxe */ 669fb2f18f8Sesaxe /*ARGSUSED*/ 670fb2f18f8Sesaxe void 671fb2f18f8Sesaxe pg_cpupart_out(cpu_t *cp, cpupart_t *pp) 672fb2f18f8Sesaxe { 673fb2f18f8Sesaxe int i; 674fb2f18f8Sesaxe 675fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 676fb2f18f8Sesaxe 677fb2f18f8Sesaxe /* 678fb2f18f8Sesaxe * Notify all registered classes that the 679fb2f18f8Sesaxe * CPU is about to leave the CPU partition 680fb2f18f8Sesaxe */ 681fb2f18f8Sesaxe for (i = 0; i < pg_nclasses; i++) 682fb2f18f8Sesaxe PG_CPUPART_OUT(i, cp, pp); 683fb2f18f8Sesaxe } 684fb2f18f8Sesaxe 685fb2f18f8Sesaxe /* 686fb2f18f8Sesaxe * Invoked when the CPU is *moving* partitions. 687fb2f18f8Sesaxe * 688fb2f18f8Sesaxe * This routine may not block, as it is called from paused CPUs 689fb2f18f8Sesaxe * context. 690fb2f18f8Sesaxe */ 691fb2f18f8Sesaxe void 692fb2f18f8Sesaxe pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) 693fb2f18f8Sesaxe { 694fb2f18f8Sesaxe int i; 695fb2f18f8Sesaxe 696fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 697fb2f18f8Sesaxe 698fb2f18f8Sesaxe /* 699fb2f18f8Sesaxe * Notify all registered classes that the 700fb2f18f8Sesaxe * CPU is about to leave the CPU partition 701fb2f18f8Sesaxe */ 702fb2f18f8Sesaxe for (i = 0; i < pg_nclasses; i++) 703fb2f18f8Sesaxe PG_CPUPART_MOVE(i, cp, oldpp, newpp); 704fb2f18f8Sesaxe } 705fb2f18f8Sesaxe 706fb2f18f8Sesaxe /* 7070e751525SEric Saxe * Return a class specific string describing a policy implemented 7080e751525SEric Saxe * across this PG 7090e751525SEric Saxe */ 7100e751525SEric Saxe char * 7110e751525SEric Saxe pg_policy_name(pg_t *pg) 7120e751525SEric Saxe { 7130e751525SEric Saxe char *str; 7140e751525SEric Saxe if ((str = PG_POLICY_NAME(pg)) != NULL) 7150e751525SEric Saxe return (str); 7160e751525SEric Saxe 7170e751525SEric Saxe return ("N/A"); 7180e751525SEric Saxe } 7190e751525SEric Saxe 7200e751525SEric Saxe /* 721fb2f18f8Sesaxe * Provide the specified CPU a bootstrap pg 722fb2f18f8Sesaxe * This is needed to allow sane behaviour if any PG consuming 723fb2f18f8Sesaxe * code needs to deal with a partially initialized CPU 724fb2f18f8Sesaxe */ 725fb2f18f8Sesaxe void 726fb2f18f8Sesaxe pg_cpu_bootstrap(cpu_t *cp) 727fb2f18f8Sesaxe { 728fb2f18f8Sesaxe cp->cpu_pg = &bootstrap_pg_data; 729fb2f18f8Sesaxe } 730fb2f18f8Sesaxe 7311a77c24bSEric Saxe /* 7321a77c24bSEric Saxe * Return non-zero if the specified CPU is bootstrapped, 7331a77c24bSEric Saxe * which means it's CPU specific PG data has not yet been 7341a77c24bSEric Saxe * fully constructed. 7351a77c24bSEric Saxe */ 7361a77c24bSEric Saxe int 7371a77c24bSEric Saxe pg_cpu_is_bootstrapped(cpu_t *cp) 7381a77c24bSEric Saxe { 7391a77c24bSEric Saxe return (cp->cpu_pg == &bootstrap_pg_data); 7401a77c24bSEric Saxe } 7411a77c24bSEric Saxe 742fb2f18f8Sesaxe /*ARGSUSED*/ 743fb2f18f8Sesaxe static pg_t * 744fb2f18f8Sesaxe pg_alloc_default(pg_class_t class) 745fb2f18f8Sesaxe { 746fb2f18f8Sesaxe return (kmem_zalloc(sizeof (pg_t), KM_SLEEP)); 747fb2f18f8Sesaxe } 748fb2f18f8Sesaxe 749fb2f18f8Sesaxe /*ARGSUSED*/ 750fb2f18f8Sesaxe static void 751fb2f18f8Sesaxe pg_free_default(struct pg *pg) 752fb2f18f8Sesaxe { 753fb2f18f8Sesaxe kmem_free(pg, sizeof (pg_t)); 754fb2f18f8Sesaxe } 7550e751525SEric Saxe 7560e751525SEric Saxe static void 7570e751525SEric Saxe pg_null_op() 7580e751525SEric Saxe { 7590e751525SEric Saxe } 7600e751525SEric Saxe 7610e751525SEric Saxe /* 7620e751525SEric Saxe * Invoke the "thread switch" callback for each of the CPU's PGs 7630e751525SEric Saxe * This is invoked from the dispatcher swtch() routine, which is called 7640e751525SEric Saxe * when a thread running an a CPU should switch to another thread. 7650e751525SEric Saxe * "cp" is the CPU on which the thread switch is happening 7660e751525SEric Saxe * "now" is an unscaled hrtime_t timestamp taken in swtch() 7670e751525SEric Saxe * "old" and "new" are the outgoing and incoming threads, respectively. 7680e751525SEric Saxe */ 7690e751525SEric Saxe void 7700e751525SEric Saxe pg_ev_thread_swtch(struct cpu *cp, hrtime_t now, kthread_t *old, kthread_t *new) 7710e751525SEric Saxe { 7720e751525SEric Saxe int i, sz; 7730e751525SEric Saxe group_t *grp; 7740e751525SEric Saxe pg_t *pg; 7750e751525SEric Saxe 7760e751525SEric Saxe grp = &cp->cpu_pg->pgs; 7770e751525SEric Saxe sz = GROUP_SIZE(grp); 7780e751525SEric Saxe for (i = 0; i < sz; i++) { 7790e751525SEric Saxe pg = GROUP_ACCESS(grp, i); 7800e751525SEric Saxe pg->pg_cb.thread_swtch(pg, cp, now, old, new); 7810e751525SEric Saxe } 7820e751525SEric Saxe } 7830e751525SEric Saxe 7840e751525SEric Saxe /* 7850e751525SEric Saxe * Invoke the "thread remain" callback for each of the CPU's PGs. 7860e751525SEric Saxe * This is called from the dispatcher's swtch() routine when a thread 7870e751525SEric Saxe * running on the CPU "cp" is switching to itself, which can happen as an 7880e751525SEric Saxe * artifact of the thread's timeslice expiring. 7890e751525SEric Saxe */ 7900e751525SEric Saxe void 7910e751525SEric Saxe pg_ev_thread_remain(struct cpu *cp, kthread_t *t) 7920e751525SEric Saxe { 7930e751525SEric Saxe int i, sz; 7940e751525SEric Saxe group_t *grp; 7950e751525SEric Saxe pg_t *pg; 7960e751525SEric Saxe 7970e751525SEric Saxe grp = &cp->cpu_pg->pgs; 7980e751525SEric Saxe sz = GROUP_SIZE(grp); 7990e751525SEric Saxe for (i = 0; i < sz; i++) { 8000e751525SEric Saxe pg = GROUP_ACCESS(grp, i); 8010e751525SEric Saxe pg->pg_cb.thread_remain(pg, cp, t); 8020e751525SEric Saxe } 8030e751525SEric Saxe } 804