1*fb2f18f8Sesaxe /* 2*fb2f18f8Sesaxe * CDDL HEADER START 3*fb2f18f8Sesaxe * 4*fb2f18f8Sesaxe * The contents of this file are subject to the terms of the 5*fb2f18f8Sesaxe * Common Development and Distribution License (the "License"). 6*fb2f18f8Sesaxe * You may not use this file except in compliance with the License. 7*fb2f18f8Sesaxe * 8*fb2f18f8Sesaxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*fb2f18f8Sesaxe * or http://www.opensolaris.org/os/licensing. 10*fb2f18f8Sesaxe * See the License for the specific language governing permissions 11*fb2f18f8Sesaxe * and limitations under the License. 12*fb2f18f8Sesaxe * 13*fb2f18f8Sesaxe * When distributing Covered Code, include this CDDL HEADER in each 14*fb2f18f8Sesaxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*fb2f18f8Sesaxe * If applicable, add the following below this CDDL HEADER, with the 16*fb2f18f8Sesaxe * fields enclosed by brackets "[]" replaced with your own identifying 17*fb2f18f8Sesaxe * information: Portions Copyright [yyyy] [name of copyright owner] 18*fb2f18f8Sesaxe * 19*fb2f18f8Sesaxe * CDDL HEADER END 20*fb2f18f8Sesaxe */ 21*fb2f18f8Sesaxe /* 22*fb2f18f8Sesaxe * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23*fb2f18f8Sesaxe * Use is subject to license terms. 24*fb2f18f8Sesaxe */ 25*fb2f18f8Sesaxe 26*fb2f18f8Sesaxe #pragma ident "%Z%%M% %I% %E% SMI" 27*fb2f18f8Sesaxe 28*fb2f18f8Sesaxe #include <sys/systm.h> 29*fb2f18f8Sesaxe #include <sys/types.h> 30*fb2f18f8Sesaxe #include <sys/param.h> 31*fb2f18f8Sesaxe #include <sys/thread.h> 32*fb2f18f8Sesaxe #include <sys/cpuvar.h> 33*fb2f18f8Sesaxe #include <sys/cpupart.h> 34*fb2f18f8Sesaxe #include <sys/kmem.h> 35*fb2f18f8Sesaxe #include <sys/cmn_err.h> 36*fb2f18f8Sesaxe #include <sys/kstat.h> 37*fb2f18f8Sesaxe #include <sys/processor.h> 38*fb2f18f8Sesaxe #include <sys/disp.h> 39*fb2f18f8Sesaxe #include <sys/group.h> 40*fb2f18f8Sesaxe #include <sys/pghw.h> 41*fb2f18f8Sesaxe #include <sys/bitset.h> 42*fb2f18f8Sesaxe #include <sys/lgrp.h> 43*fb2f18f8Sesaxe #include <sys/cmt.h> 44*fb2f18f8Sesaxe 45*fb2f18f8Sesaxe /* 46*fb2f18f8Sesaxe * CMT scheduler / dispatcher support 47*fb2f18f8Sesaxe * 48*fb2f18f8Sesaxe * This file implements CMT scheduler support using Processor Groups. 49*fb2f18f8Sesaxe * The CMT processor group class creates and maintains the CMT class 50*fb2f18f8Sesaxe * specific processor group pg_cmt_t. 51*fb2f18f8Sesaxe * 52*fb2f18f8Sesaxe * ---------------------------- <-- pg_cmt_t * 53*fb2f18f8Sesaxe * | pghw_t | 54*fb2f18f8Sesaxe * ---------------------------- 55*fb2f18f8Sesaxe * | CMT class specific data | 56*fb2f18f8Sesaxe * | - hierarchy linkage | 57*fb2f18f8Sesaxe * | - CMT load balancing data| 58*fb2f18f8Sesaxe * | - active CPU group/bitset| 59*fb2f18f8Sesaxe * ---------------------------- 60*fb2f18f8Sesaxe * 61*fb2f18f8Sesaxe * The scheduler/dispatcher leverages knowledge of the performance 62*fb2f18f8Sesaxe * relevant CMT sharing relationships existing between cpus to implement 63*fb2f18f8Sesaxe * optimized affinity and load balancing policies. 64*fb2f18f8Sesaxe * 65*fb2f18f8Sesaxe * Load balancing policy seeks to improve performance by minimizing 66*fb2f18f8Sesaxe * contention over shared processor resources / facilities, while the 67*fb2f18f8Sesaxe * affinity policies seek to improve cache and TLB utilization. 68*fb2f18f8Sesaxe * 69*fb2f18f8Sesaxe * The CMT PGs created by this class are already arranged into a 70*fb2f18f8Sesaxe * hierarchy (which is done in the pghw layer). To implement the top-down 71*fb2f18f8Sesaxe * CMT load balancing algorithm, the CMT PGs additionally maintain 72*fb2f18f8Sesaxe * parent, child and sibling hierarchy relationships. 73*fb2f18f8Sesaxe * Parent PGs always contain a superset of their children(s) resources, 74*fb2f18f8Sesaxe * each PG can have at most one parent, and siblings are the group of PGs 75*fb2f18f8Sesaxe * sharing the same parent. 76*fb2f18f8Sesaxe * 77*fb2f18f8Sesaxe * On NUMA systems, the CMT load balancing algorithm balances across the 78*fb2f18f8Sesaxe * CMT PGs within their respective lgroups. On UMA based system, there 79*fb2f18f8Sesaxe * exists a top level group of PGs to balance across. On NUMA systems multiple 80*fb2f18f8Sesaxe * top level groups are instantiated, where the top level balancing begins by 81*fb2f18f8Sesaxe * balancng across the CMT PGs within their respective (per lgroup) top level 82*fb2f18f8Sesaxe * groups. 83*fb2f18f8Sesaxe */ 84*fb2f18f8Sesaxe 85*fb2f18f8Sesaxe typedef struct cmt_lgrp { 86*fb2f18f8Sesaxe group_t cl_pgs; /* Top level group of active CMT PGs */ 87*fb2f18f8Sesaxe int cl_npgs; /* # of top level PGs in the lgroup */ 88*fb2f18f8Sesaxe lgrp_handle_t cl_hand; /* lgroup's platform handle */ 89*fb2f18f8Sesaxe struct cmt_lgrp *cl_next; /* next cmt_lgrp */ 90*fb2f18f8Sesaxe } cmt_lgrp_t; 91*fb2f18f8Sesaxe 92*fb2f18f8Sesaxe static cmt_lgrp_t *cmt_lgrps = NULL; 93*fb2f18f8Sesaxe 94*fb2f18f8Sesaxe static int is_cpu0 = 1; 95*fb2f18f8Sesaxe static int cmt_sched_disabled = 0; 96*fb2f18f8Sesaxe 97*fb2f18f8Sesaxe static pg_cid_t pg_cmt_class_id; /* PG class id */ 98*fb2f18f8Sesaxe 99*fb2f18f8Sesaxe static pg_t *pg_cmt_alloc(); 100*fb2f18f8Sesaxe static void pg_cmt_free(pg_t *); 101*fb2f18f8Sesaxe static void pg_cmt_cpu_init(cpu_t *); 102*fb2f18f8Sesaxe static void pg_cmt_cpu_fini(cpu_t *); 103*fb2f18f8Sesaxe static void pg_cmt_cpu_active(cpu_t *); 104*fb2f18f8Sesaxe static void pg_cmt_cpu_inactive(cpu_t *); 105*fb2f18f8Sesaxe static void pg_cmt_cpupart_in(cpu_t *, cpupart_t *); 106*fb2f18f8Sesaxe static void pg_cmt_cpupart_move(cpu_t *, cpupart_t *, cpupart_t *); 107*fb2f18f8Sesaxe static void pg_cmt_hier_pack(pg_cmt_t **, int); 108*fb2f18f8Sesaxe static int pg_cmt_cpu_belongs(pg_t *, cpu_t *); 109*fb2f18f8Sesaxe static int pg_cmt_hw(pghw_type_t); 110*fb2f18f8Sesaxe static cmt_lgrp_t *pg_cmt_find_lgrp(lgrp_handle_t); 111*fb2f18f8Sesaxe 112*fb2f18f8Sesaxe /* 113*fb2f18f8Sesaxe * Macro to test if PG is managed by the CMT PG class 114*fb2f18f8Sesaxe */ 115*fb2f18f8Sesaxe #define IS_CMT_PG(pg) (((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id) 116*fb2f18f8Sesaxe 117*fb2f18f8Sesaxe /* 118*fb2f18f8Sesaxe * CMT PG ops 119*fb2f18f8Sesaxe */ 120*fb2f18f8Sesaxe struct pg_ops pg_ops_cmt = { 121*fb2f18f8Sesaxe pg_cmt_alloc, 122*fb2f18f8Sesaxe pg_cmt_free, 123*fb2f18f8Sesaxe pg_cmt_cpu_init, 124*fb2f18f8Sesaxe pg_cmt_cpu_fini, 125*fb2f18f8Sesaxe pg_cmt_cpu_active, 126*fb2f18f8Sesaxe pg_cmt_cpu_inactive, 127*fb2f18f8Sesaxe pg_cmt_cpupart_in, 128*fb2f18f8Sesaxe NULL, /* cpupart_out */ 129*fb2f18f8Sesaxe pg_cmt_cpupart_move, 130*fb2f18f8Sesaxe pg_cmt_cpu_belongs, 131*fb2f18f8Sesaxe }; 132*fb2f18f8Sesaxe 133*fb2f18f8Sesaxe /* 134*fb2f18f8Sesaxe * Initialize the CMT PG class 135*fb2f18f8Sesaxe */ 136*fb2f18f8Sesaxe void 137*fb2f18f8Sesaxe pg_cmt_class_init(void) 138*fb2f18f8Sesaxe { 139*fb2f18f8Sesaxe if (cmt_sched_disabled) 140*fb2f18f8Sesaxe return; 141*fb2f18f8Sesaxe 142*fb2f18f8Sesaxe pg_cmt_class_id = pg_class_register("cmt", &pg_ops_cmt, PGR_PHYSICAL); 143*fb2f18f8Sesaxe } 144*fb2f18f8Sesaxe 145*fb2f18f8Sesaxe /* 146*fb2f18f8Sesaxe * Called to indicate a new CPU has started up so 147*fb2f18f8Sesaxe * that either t0 or the slave startup thread can 148*fb2f18f8Sesaxe * be accounted for. 149*fb2f18f8Sesaxe */ 150*fb2f18f8Sesaxe void 151*fb2f18f8Sesaxe pg_cmt_cpu_startup(cpu_t *cp) 152*fb2f18f8Sesaxe { 153*fb2f18f8Sesaxe PG_NRUN_UPDATE(cp, 1); 154*fb2f18f8Sesaxe } 155*fb2f18f8Sesaxe 156*fb2f18f8Sesaxe /* 157*fb2f18f8Sesaxe * Adjust the CMT load in the CMT PGs in which the CPU belongs 158*fb2f18f8Sesaxe * Note that "n" can be positive in the case of increasing 159*fb2f18f8Sesaxe * load, or negative in the case of decreasing load. 160*fb2f18f8Sesaxe */ 161*fb2f18f8Sesaxe void 162*fb2f18f8Sesaxe pg_cmt_load(cpu_t *cp, int n) 163*fb2f18f8Sesaxe { 164*fb2f18f8Sesaxe pg_cmt_t *pg; 165*fb2f18f8Sesaxe 166*fb2f18f8Sesaxe pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage; 167*fb2f18f8Sesaxe while (pg != NULL) { 168*fb2f18f8Sesaxe ASSERT(IS_CMT_PG(pg)); 169*fb2f18f8Sesaxe atomic_add_32(&pg->cmt_nrunning, n); 170*fb2f18f8Sesaxe pg = pg->cmt_parent; 171*fb2f18f8Sesaxe } 172*fb2f18f8Sesaxe } 173*fb2f18f8Sesaxe 174*fb2f18f8Sesaxe /* 175*fb2f18f8Sesaxe * Return non-zero if thread can migrate between "from" and "to" 176*fb2f18f8Sesaxe * without a performance penalty 177*fb2f18f8Sesaxe */ 178*fb2f18f8Sesaxe int 179*fb2f18f8Sesaxe pg_cmt_can_migrate(cpu_t *from, cpu_t *to) 180*fb2f18f8Sesaxe { 181*fb2f18f8Sesaxe if (from->cpu_physid->cpu_cacheid == 182*fb2f18f8Sesaxe to->cpu_physid->cpu_cacheid) 183*fb2f18f8Sesaxe return (1); 184*fb2f18f8Sesaxe return (0); 185*fb2f18f8Sesaxe } 186*fb2f18f8Sesaxe 187*fb2f18f8Sesaxe /* 188*fb2f18f8Sesaxe * CMT class specific PG allocation 189*fb2f18f8Sesaxe */ 190*fb2f18f8Sesaxe static pg_t * 191*fb2f18f8Sesaxe pg_cmt_alloc(void) 192*fb2f18f8Sesaxe { 193*fb2f18f8Sesaxe return (kmem_zalloc(sizeof (pg_cmt_t), KM_NOSLEEP)); 194*fb2f18f8Sesaxe } 195*fb2f18f8Sesaxe 196*fb2f18f8Sesaxe /* 197*fb2f18f8Sesaxe * Class specific PG de-allocation 198*fb2f18f8Sesaxe */ 199*fb2f18f8Sesaxe static void 200*fb2f18f8Sesaxe pg_cmt_free(pg_t *pg) 201*fb2f18f8Sesaxe { 202*fb2f18f8Sesaxe ASSERT(pg != NULL); 203*fb2f18f8Sesaxe ASSERT(IS_CMT_PG(pg)); 204*fb2f18f8Sesaxe 205*fb2f18f8Sesaxe kmem_free((pg_cmt_t *)pg, sizeof (pg_cmt_t)); 206*fb2f18f8Sesaxe } 207*fb2f18f8Sesaxe 208*fb2f18f8Sesaxe /* 209*fb2f18f8Sesaxe * Return 1 if CMT load balancing policies should be 210*fb2f18f8Sesaxe * implemented across instances of the specified hardware 211*fb2f18f8Sesaxe * sharing relationship. 212*fb2f18f8Sesaxe */ 213*fb2f18f8Sesaxe static int 214*fb2f18f8Sesaxe pg_cmt_load_bal_hw(pghw_type_t hw) 215*fb2f18f8Sesaxe { 216*fb2f18f8Sesaxe if (hw == PGHW_IPIPE || 217*fb2f18f8Sesaxe hw == PGHW_FPU || 218*fb2f18f8Sesaxe hw == PGHW_CHIP) 219*fb2f18f8Sesaxe return (1); 220*fb2f18f8Sesaxe else 221*fb2f18f8Sesaxe return (0); 222*fb2f18f8Sesaxe } 223*fb2f18f8Sesaxe 224*fb2f18f8Sesaxe /* 225*fb2f18f8Sesaxe * Return 1 if thread affinity polices should be implemented 226*fb2f18f8Sesaxe * for instances of the specifed hardware sharing relationship. 227*fb2f18f8Sesaxe */ 228*fb2f18f8Sesaxe static int 229*fb2f18f8Sesaxe pg_cmt_affinity_hw(pghw_type_t hw) 230*fb2f18f8Sesaxe { 231*fb2f18f8Sesaxe if (hw == PGHW_CACHE) 232*fb2f18f8Sesaxe return (1); 233*fb2f18f8Sesaxe else 234*fb2f18f8Sesaxe return (0); 235*fb2f18f8Sesaxe } 236*fb2f18f8Sesaxe 237*fb2f18f8Sesaxe /* 238*fb2f18f8Sesaxe * Return 1 if CMT scheduling policies should be impelmented 239*fb2f18f8Sesaxe * for the specified hardware sharing relationship. 240*fb2f18f8Sesaxe */ 241*fb2f18f8Sesaxe static int 242*fb2f18f8Sesaxe pg_cmt_hw(pghw_type_t hw) 243*fb2f18f8Sesaxe { 244*fb2f18f8Sesaxe return (pg_cmt_load_bal_hw(hw) || 245*fb2f18f8Sesaxe pg_cmt_affinity_hw(hw)); 246*fb2f18f8Sesaxe } 247*fb2f18f8Sesaxe 248*fb2f18f8Sesaxe /* 249*fb2f18f8Sesaxe * CMT class callback for a new CPU entering the system 250*fb2f18f8Sesaxe */ 251*fb2f18f8Sesaxe static void 252*fb2f18f8Sesaxe pg_cmt_cpu_init(cpu_t *cp) 253*fb2f18f8Sesaxe { 254*fb2f18f8Sesaxe pg_cmt_t *pg; 255*fb2f18f8Sesaxe group_t *cmt_pgs; 256*fb2f18f8Sesaxe int level, max_level, nlevels; 257*fb2f18f8Sesaxe pghw_type_t hw; 258*fb2f18f8Sesaxe pg_t *pg_cache = NULL; 259*fb2f18f8Sesaxe pg_cmt_t *cpu_cmt_hier[PGHW_NUM_COMPONENTS]; 260*fb2f18f8Sesaxe lgrp_handle_t lgrp_handle; 261*fb2f18f8Sesaxe cmt_lgrp_t *lgrp; 262*fb2f18f8Sesaxe 263*fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 264*fb2f18f8Sesaxe 265*fb2f18f8Sesaxe /* 266*fb2f18f8Sesaxe * A new CPU is coming into the system. 267*fb2f18f8Sesaxe * Interrogate the platform to see if the CPU 268*fb2f18f8Sesaxe * has any performance relevant CMT sharing 269*fb2f18f8Sesaxe * relationships 270*fb2f18f8Sesaxe */ 271*fb2f18f8Sesaxe cmt_pgs = &cp->cpu_pg->cmt_pgs; 272*fb2f18f8Sesaxe cp->cpu_pg->cmt_lineage = NULL; 273*fb2f18f8Sesaxe 274*fb2f18f8Sesaxe bzero(cpu_cmt_hier, sizeof (cpu_cmt_hier)); 275*fb2f18f8Sesaxe max_level = nlevels = 0; 276*fb2f18f8Sesaxe for (hw = PGHW_START; hw < PGHW_NUM_COMPONENTS; hw++) { 277*fb2f18f8Sesaxe 278*fb2f18f8Sesaxe /* 279*fb2f18f8Sesaxe * We're only interested in CMT hw sharing relationships 280*fb2f18f8Sesaxe */ 281*fb2f18f8Sesaxe if (pg_cmt_hw(hw) == 0 || pg_plat_hw_shared(cp, hw) == 0) 282*fb2f18f8Sesaxe continue; 283*fb2f18f8Sesaxe 284*fb2f18f8Sesaxe /* 285*fb2f18f8Sesaxe * Find (or create) the PG associated with 286*fb2f18f8Sesaxe * the hw sharing relationship in which cp 287*fb2f18f8Sesaxe * belongs. 288*fb2f18f8Sesaxe * 289*fb2f18f8Sesaxe * Determine if a suitable PG already 290*fb2f18f8Sesaxe * exists, or if one needs to be created. 291*fb2f18f8Sesaxe */ 292*fb2f18f8Sesaxe pg = (pg_cmt_t *)pghw_place_cpu(cp, hw); 293*fb2f18f8Sesaxe if (pg == NULL) { 294*fb2f18f8Sesaxe /* 295*fb2f18f8Sesaxe * Create a new one. 296*fb2f18f8Sesaxe * Initialize the common... 297*fb2f18f8Sesaxe */ 298*fb2f18f8Sesaxe pg = (pg_cmt_t *)pg_create(pg_cmt_class_id); 299*fb2f18f8Sesaxe 300*fb2f18f8Sesaxe /* ... physical ... */ 301*fb2f18f8Sesaxe pghw_init((pghw_t *)pg, cp, hw); 302*fb2f18f8Sesaxe 303*fb2f18f8Sesaxe /* 304*fb2f18f8Sesaxe * ... and CMT specific portions of the 305*fb2f18f8Sesaxe * structure. 306*fb2f18f8Sesaxe */ 307*fb2f18f8Sesaxe bitset_init(&pg->cmt_cpus_actv_set); 308*fb2f18f8Sesaxe group_create(&pg->cmt_cpus_actv); 309*fb2f18f8Sesaxe } else { 310*fb2f18f8Sesaxe ASSERT(IS_CMT_PG(pg)); 311*fb2f18f8Sesaxe } 312*fb2f18f8Sesaxe 313*fb2f18f8Sesaxe /* Add the CPU to the PG */ 314*fb2f18f8Sesaxe pg_cpu_add((pg_t *)pg, cp); 315*fb2f18f8Sesaxe 316*fb2f18f8Sesaxe /* 317*fb2f18f8Sesaxe * Ensure capacity of the active CPUs group/bitset 318*fb2f18f8Sesaxe */ 319*fb2f18f8Sesaxe group_expand(&pg->cmt_cpus_actv, 320*fb2f18f8Sesaxe GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); 321*fb2f18f8Sesaxe 322*fb2f18f8Sesaxe if (cp->cpu_seqid >= 323*fb2f18f8Sesaxe bitset_capacity(&pg->cmt_cpus_actv_set)) { 324*fb2f18f8Sesaxe bitset_resize(&pg->cmt_cpus_actv_set, 325*fb2f18f8Sesaxe cp->cpu_seqid + 1); 326*fb2f18f8Sesaxe } 327*fb2f18f8Sesaxe 328*fb2f18f8Sesaxe /* 329*fb2f18f8Sesaxe * Build a lineage of CMT PGs for load balancing 330*fb2f18f8Sesaxe */ 331*fb2f18f8Sesaxe if (pg_cmt_load_bal_hw(hw)) { 332*fb2f18f8Sesaxe level = pghw_level(hw); 333*fb2f18f8Sesaxe cpu_cmt_hier[level] = pg; 334*fb2f18f8Sesaxe if (level > max_level) 335*fb2f18f8Sesaxe max_level = level; 336*fb2f18f8Sesaxe nlevels++; 337*fb2f18f8Sesaxe } 338*fb2f18f8Sesaxe 339*fb2f18f8Sesaxe /* Cache this for later */ 340*fb2f18f8Sesaxe if (hw == PGHW_CACHE) 341*fb2f18f8Sesaxe pg_cache = (pg_t *)pg; 342*fb2f18f8Sesaxe } 343*fb2f18f8Sesaxe 344*fb2f18f8Sesaxe /* 345*fb2f18f8Sesaxe * Pack out any gaps in the constructed lineage. 346*fb2f18f8Sesaxe * Gaps may exist where the architecture knows 347*fb2f18f8Sesaxe * about a hardware sharing relationship, but such a 348*fb2f18f8Sesaxe * relationship either isn't relevant for load 349*fb2f18f8Sesaxe * balancing or doesn't exist between CPUs on the system. 350*fb2f18f8Sesaxe */ 351*fb2f18f8Sesaxe pg_cmt_hier_pack(cpu_cmt_hier, max_level + 1); 352*fb2f18f8Sesaxe 353*fb2f18f8Sesaxe /* 354*fb2f18f8Sesaxe * For each of the PGs int the CPU's lineage: 355*fb2f18f8Sesaxe * - Add an entry in the CPU sorted CMT PG group 356*fb2f18f8Sesaxe * which is used for top down CMT load balancing 357*fb2f18f8Sesaxe * - Tie the PG into the CMT hierarchy by connecting 358*fb2f18f8Sesaxe * it to it's parent and siblings. 359*fb2f18f8Sesaxe */ 360*fb2f18f8Sesaxe group_expand(cmt_pgs, nlevels); 361*fb2f18f8Sesaxe 362*fb2f18f8Sesaxe /* 363*fb2f18f8Sesaxe * Find the lgrp that encapsulates this CPU's CMT hierarchy 364*fb2f18f8Sesaxe */ 365*fb2f18f8Sesaxe lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); 366*fb2f18f8Sesaxe lgrp = pg_cmt_find_lgrp(lgrp_handle); 367*fb2f18f8Sesaxe 368*fb2f18f8Sesaxe for (level = 0; level < nlevels; level++) { 369*fb2f18f8Sesaxe uint_t children; 370*fb2f18f8Sesaxe int err; 371*fb2f18f8Sesaxe 372*fb2f18f8Sesaxe pg = cpu_cmt_hier[level]; 373*fb2f18f8Sesaxe err = group_add_at(cmt_pgs, pg, nlevels - level - 1); 374*fb2f18f8Sesaxe ASSERT(err == 0); 375*fb2f18f8Sesaxe 376*fb2f18f8Sesaxe if (level == 0) 377*fb2f18f8Sesaxe cp->cpu_pg->cmt_lineage = (pg_t *)pg; 378*fb2f18f8Sesaxe 379*fb2f18f8Sesaxe if (pg->cmt_siblings != NULL) { 380*fb2f18f8Sesaxe /* Already initialized */ 381*fb2f18f8Sesaxe ASSERT(pg->cmt_parent == NULL || 382*fb2f18f8Sesaxe pg->cmt_parent == cpu_cmt_hier[level + 1]); 383*fb2f18f8Sesaxe ASSERT(pg->cmt_siblings == &lgrp->cl_pgs || 384*fb2f18f8Sesaxe pg->cmt_siblings == pg->cmt_parent->cmt_children); 385*fb2f18f8Sesaxe continue; 386*fb2f18f8Sesaxe } 387*fb2f18f8Sesaxe 388*fb2f18f8Sesaxe if ((level + 1) == nlevels) { 389*fb2f18f8Sesaxe pg->cmt_parent = NULL; 390*fb2f18f8Sesaxe pg->cmt_siblings = &lgrp->cl_pgs; 391*fb2f18f8Sesaxe children = ++lgrp->cl_npgs; 392*fb2f18f8Sesaxe } else { 393*fb2f18f8Sesaxe pg->cmt_parent = cpu_cmt_hier[level + 1]; 394*fb2f18f8Sesaxe 395*fb2f18f8Sesaxe /* 396*fb2f18f8Sesaxe * A good parent keeps track of their children. 397*fb2f18f8Sesaxe * The parent's children group is also the PG's 398*fb2f18f8Sesaxe * siblings. 399*fb2f18f8Sesaxe */ 400*fb2f18f8Sesaxe if (pg->cmt_parent->cmt_children == NULL) { 401*fb2f18f8Sesaxe pg->cmt_parent->cmt_children = 402*fb2f18f8Sesaxe kmem_zalloc(sizeof (group_t), KM_SLEEP); 403*fb2f18f8Sesaxe group_create(pg->cmt_parent->cmt_children); 404*fb2f18f8Sesaxe } 405*fb2f18f8Sesaxe pg->cmt_siblings = pg->cmt_parent->cmt_children; 406*fb2f18f8Sesaxe children = ++pg->cmt_parent->cmt_nchildren; 407*fb2f18f8Sesaxe } 408*fb2f18f8Sesaxe pg->cmt_hint = 0; 409*fb2f18f8Sesaxe group_expand(pg->cmt_siblings, children); 410*fb2f18f8Sesaxe } 411*fb2f18f8Sesaxe 412*fb2f18f8Sesaxe /* 413*fb2f18f8Sesaxe * Cache the chip and core IDs in the cpu_t->cpu_physid structure 414*fb2f18f8Sesaxe * for fast lookups later. 415*fb2f18f8Sesaxe */ 416*fb2f18f8Sesaxe if (cp->cpu_physid) { 417*fb2f18f8Sesaxe cp->cpu_physid->cpu_chipid = 418*fb2f18f8Sesaxe pg_plat_hw_instance_id(cp, PGHW_CHIP); 419*fb2f18f8Sesaxe cp->cpu_physid->cpu_coreid = pg_plat_get_core_id(cp); 420*fb2f18f8Sesaxe 421*fb2f18f8Sesaxe /* 422*fb2f18f8Sesaxe * If this cpu has a PG representing shared cache, then set 423*fb2f18f8Sesaxe * cpu_cacheid to that PG's logical id 424*fb2f18f8Sesaxe */ 425*fb2f18f8Sesaxe if (pg_cache) 426*fb2f18f8Sesaxe cp->cpu_physid->cpu_cacheid = pg_cache->pg_id; 427*fb2f18f8Sesaxe } 428*fb2f18f8Sesaxe 429*fb2f18f8Sesaxe /* CPU0 only initialization */ 430*fb2f18f8Sesaxe if (is_cpu0) { 431*fb2f18f8Sesaxe pg_cmt_cpu_startup(cp); 432*fb2f18f8Sesaxe is_cpu0 = 0; 433*fb2f18f8Sesaxe } 434*fb2f18f8Sesaxe 435*fb2f18f8Sesaxe } 436*fb2f18f8Sesaxe 437*fb2f18f8Sesaxe /* 438*fb2f18f8Sesaxe * Class callback when a CPU is leaving the system (deletion) 439*fb2f18f8Sesaxe */ 440*fb2f18f8Sesaxe static void 441*fb2f18f8Sesaxe pg_cmt_cpu_fini(cpu_t *cp) 442*fb2f18f8Sesaxe { 443*fb2f18f8Sesaxe group_iter_t i; 444*fb2f18f8Sesaxe pg_cmt_t *pg; 445*fb2f18f8Sesaxe group_t *pgs, *cmt_pgs; 446*fb2f18f8Sesaxe lgrp_handle_t lgrp_handle; 447*fb2f18f8Sesaxe cmt_lgrp_t *lgrp; 448*fb2f18f8Sesaxe 449*fb2f18f8Sesaxe pgs = &cp->cpu_pg->pgs; 450*fb2f18f8Sesaxe cmt_pgs = &cp->cpu_pg->cmt_pgs; 451*fb2f18f8Sesaxe 452*fb2f18f8Sesaxe /* 453*fb2f18f8Sesaxe * Find the lgroup that encapsulates this CPU's CMT hierarchy 454*fb2f18f8Sesaxe */ 455*fb2f18f8Sesaxe lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); 456*fb2f18f8Sesaxe lgrp = pg_cmt_find_lgrp(lgrp_handle); 457*fb2f18f8Sesaxe 458*fb2f18f8Sesaxe /* 459*fb2f18f8Sesaxe * First, clean up anything load balancing specific for each of 460*fb2f18f8Sesaxe * the CPU's PGs that participated in CMT load balancing 461*fb2f18f8Sesaxe */ 462*fb2f18f8Sesaxe pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage; 463*fb2f18f8Sesaxe while (pg != NULL) { 464*fb2f18f8Sesaxe 465*fb2f18f8Sesaxe /* 466*fb2f18f8Sesaxe * Remove the PG from the CPU's load balancing lineage 467*fb2f18f8Sesaxe */ 468*fb2f18f8Sesaxe (void) group_remove(cmt_pgs, pg, GRP_RESIZE); 469*fb2f18f8Sesaxe 470*fb2f18f8Sesaxe /* 471*fb2f18f8Sesaxe * If it's about to become empty, destroy it's children 472*fb2f18f8Sesaxe * group, and remove it's reference from it's siblings. 473*fb2f18f8Sesaxe * This is done here (rather than below) to avoid removing 474*fb2f18f8Sesaxe * our reference from a PG that we just eliminated. 475*fb2f18f8Sesaxe */ 476*fb2f18f8Sesaxe if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 1) { 477*fb2f18f8Sesaxe if (pg->cmt_children != NULL) 478*fb2f18f8Sesaxe group_destroy(pg->cmt_children); 479*fb2f18f8Sesaxe if (pg->cmt_siblings != NULL) { 480*fb2f18f8Sesaxe if (pg->cmt_siblings == &lgrp->cl_pgs) 481*fb2f18f8Sesaxe lgrp->cl_npgs--; 482*fb2f18f8Sesaxe else 483*fb2f18f8Sesaxe pg->cmt_parent->cmt_nchildren--; 484*fb2f18f8Sesaxe } 485*fb2f18f8Sesaxe } 486*fb2f18f8Sesaxe pg = pg->cmt_parent; 487*fb2f18f8Sesaxe } 488*fb2f18f8Sesaxe 489*fb2f18f8Sesaxe ASSERT(GROUP_SIZE(cmt_pgs) == 0); 490*fb2f18f8Sesaxe 491*fb2f18f8Sesaxe /* 492*fb2f18f8Sesaxe * Now that the load balancing lineage updates have happened, 493*fb2f18f8Sesaxe * remove the CPU from all it's PGs (destroying any that become 494*fb2f18f8Sesaxe * empty). 495*fb2f18f8Sesaxe */ 496*fb2f18f8Sesaxe group_iter_init(&i); 497*fb2f18f8Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 498*fb2f18f8Sesaxe if (IS_CMT_PG(pg) == 0) 499*fb2f18f8Sesaxe continue; 500*fb2f18f8Sesaxe 501*fb2f18f8Sesaxe pg_cpu_delete((pg_t *)pg, cp); 502*fb2f18f8Sesaxe /* 503*fb2f18f8Sesaxe * Deleting the CPU from the PG changes the CPU's 504*fb2f18f8Sesaxe * PG group over which we are actively iterating 505*fb2f18f8Sesaxe * Re-initialize the iteration 506*fb2f18f8Sesaxe */ 507*fb2f18f8Sesaxe group_iter_init(&i); 508*fb2f18f8Sesaxe 509*fb2f18f8Sesaxe if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 0) { 510*fb2f18f8Sesaxe 511*fb2f18f8Sesaxe /* 512*fb2f18f8Sesaxe * The PG has become zero sized, so destroy it. 513*fb2f18f8Sesaxe */ 514*fb2f18f8Sesaxe group_destroy(&pg->cmt_cpus_actv); 515*fb2f18f8Sesaxe bitset_fini(&pg->cmt_cpus_actv_set); 516*fb2f18f8Sesaxe pghw_fini((pghw_t *)pg); 517*fb2f18f8Sesaxe 518*fb2f18f8Sesaxe pg_destroy((pg_t *)pg); 519*fb2f18f8Sesaxe } 520*fb2f18f8Sesaxe } 521*fb2f18f8Sesaxe } 522*fb2f18f8Sesaxe 523*fb2f18f8Sesaxe /* 524*fb2f18f8Sesaxe * Class callback when a CPU is entering a cpu partition 525*fb2f18f8Sesaxe */ 526*fb2f18f8Sesaxe static void 527*fb2f18f8Sesaxe pg_cmt_cpupart_in(cpu_t *cp, cpupart_t *pp) 528*fb2f18f8Sesaxe { 529*fb2f18f8Sesaxe group_t *pgs; 530*fb2f18f8Sesaxe pg_t *pg; 531*fb2f18f8Sesaxe group_iter_t i; 532*fb2f18f8Sesaxe 533*fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 534*fb2f18f8Sesaxe 535*fb2f18f8Sesaxe pgs = &cp->cpu_pg->pgs; 536*fb2f18f8Sesaxe 537*fb2f18f8Sesaxe /* 538*fb2f18f8Sesaxe * Ensure that the new partition's PG bitset 539*fb2f18f8Sesaxe * is large enough for all CMT PG's to which cp 540*fb2f18f8Sesaxe * belongs 541*fb2f18f8Sesaxe */ 542*fb2f18f8Sesaxe group_iter_init(&i); 543*fb2f18f8Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 544*fb2f18f8Sesaxe if (IS_CMT_PG(pg) == 0) 545*fb2f18f8Sesaxe continue; 546*fb2f18f8Sesaxe 547*fb2f18f8Sesaxe if (bitset_capacity(&pp->cp_cmt_pgs) <= pg->pg_id) 548*fb2f18f8Sesaxe bitset_resize(&pp->cp_cmt_pgs, pg->pg_id + 1); 549*fb2f18f8Sesaxe } 550*fb2f18f8Sesaxe } 551*fb2f18f8Sesaxe 552*fb2f18f8Sesaxe /* 553*fb2f18f8Sesaxe * Class callback when a CPU is actually moving partitions 554*fb2f18f8Sesaxe */ 555*fb2f18f8Sesaxe static void 556*fb2f18f8Sesaxe pg_cmt_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) 557*fb2f18f8Sesaxe { 558*fb2f18f8Sesaxe cpu_t *cpp; 559*fb2f18f8Sesaxe group_t *pgs; 560*fb2f18f8Sesaxe pg_t *pg; 561*fb2f18f8Sesaxe group_iter_t pg_iter; 562*fb2f18f8Sesaxe pg_cpu_itr_t cpu_iter; 563*fb2f18f8Sesaxe boolean_t found; 564*fb2f18f8Sesaxe 565*fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 566*fb2f18f8Sesaxe 567*fb2f18f8Sesaxe pgs = &cp->cpu_pg->pgs; 568*fb2f18f8Sesaxe group_iter_init(&pg_iter); 569*fb2f18f8Sesaxe 570*fb2f18f8Sesaxe /* 571*fb2f18f8Sesaxe * Iterate over the CPUs CMT PGs 572*fb2f18f8Sesaxe */ 573*fb2f18f8Sesaxe while ((pg = group_iterate(pgs, &pg_iter)) != NULL) { 574*fb2f18f8Sesaxe 575*fb2f18f8Sesaxe if (IS_CMT_PG(pg) == 0) 576*fb2f18f8Sesaxe continue; 577*fb2f18f8Sesaxe 578*fb2f18f8Sesaxe /* 579*fb2f18f8Sesaxe * Add the PG to the bitset in the new partition. 580*fb2f18f8Sesaxe */ 581*fb2f18f8Sesaxe bitset_add(&newpp->cp_cmt_pgs, pg->pg_id); 582*fb2f18f8Sesaxe 583*fb2f18f8Sesaxe /* 584*fb2f18f8Sesaxe * Remove the PG from the bitset in the old partition 585*fb2f18f8Sesaxe * if the last of the PG's CPUs have left. 586*fb2f18f8Sesaxe */ 587*fb2f18f8Sesaxe found = B_FALSE; 588*fb2f18f8Sesaxe PG_CPU_ITR_INIT(pg, cpu_iter); 589*fb2f18f8Sesaxe while ((cpp = pg_cpu_next(&cpu_iter)) != NULL) { 590*fb2f18f8Sesaxe if (cpp == cp) 591*fb2f18f8Sesaxe continue; 592*fb2f18f8Sesaxe if (cpp->cpu_part->cp_id == oldpp->cp_id) { 593*fb2f18f8Sesaxe found = B_TRUE; 594*fb2f18f8Sesaxe break; 595*fb2f18f8Sesaxe } 596*fb2f18f8Sesaxe } 597*fb2f18f8Sesaxe if (!found) 598*fb2f18f8Sesaxe bitset_del(&cp->cpu_part->cp_cmt_pgs, pg->pg_id); 599*fb2f18f8Sesaxe } 600*fb2f18f8Sesaxe } 601*fb2f18f8Sesaxe 602*fb2f18f8Sesaxe /* 603*fb2f18f8Sesaxe * Class callback when a CPU becomes active (online) 604*fb2f18f8Sesaxe * 605*fb2f18f8Sesaxe * This is called in a context where CPUs are paused 606*fb2f18f8Sesaxe */ 607*fb2f18f8Sesaxe static void 608*fb2f18f8Sesaxe pg_cmt_cpu_active(cpu_t *cp) 609*fb2f18f8Sesaxe { 610*fb2f18f8Sesaxe int err; 611*fb2f18f8Sesaxe group_iter_t i; 612*fb2f18f8Sesaxe pg_cmt_t *pg; 613*fb2f18f8Sesaxe group_t *pgs; 614*fb2f18f8Sesaxe 615*fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 616*fb2f18f8Sesaxe 617*fb2f18f8Sesaxe pgs = &cp->cpu_pg->pgs; 618*fb2f18f8Sesaxe group_iter_init(&i); 619*fb2f18f8Sesaxe 620*fb2f18f8Sesaxe /* 621*fb2f18f8Sesaxe * Iterate over the CPU's PGs 622*fb2f18f8Sesaxe */ 623*fb2f18f8Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 624*fb2f18f8Sesaxe 625*fb2f18f8Sesaxe if (IS_CMT_PG(pg) == 0) 626*fb2f18f8Sesaxe continue; 627*fb2f18f8Sesaxe 628*fb2f18f8Sesaxe err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); 629*fb2f18f8Sesaxe ASSERT(err == 0); 630*fb2f18f8Sesaxe 631*fb2f18f8Sesaxe /* 632*fb2f18f8Sesaxe * If this is the first active CPU in the PG, and it 633*fb2f18f8Sesaxe * represents a hardware sharing relationship over which 634*fb2f18f8Sesaxe * CMT load balancing is performed, add it as a candidate 635*fb2f18f8Sesaxe * for balancing with it's siblings. 636*fb2f18f8Sesaxe */ 637*fb2f18f8Sesaxe if (GROUP_SIZE(&pg->cmt_cpus_actv) == 1 && 638*fb2f18f8Sesaxe pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) { 639*fb2f18f8Sesaxe err = group_add(pg->cmt_siblings, pg, GRP_NORESIZE); 640*fb2f18f8Sesaxe ASSERT(err == 0); 641*fb2f18f8Sesaxe } 642*fb2f18f8Sesaxe 643*fb2f18f8Sesaxe /* 644*fb2f18f8Sesaxe * Notate the CPU in the PGs active CPU bitset. 645*fb2f18f8Sesaxe * Also notate the PG as being active in it's associated 646*fb2f18f8Sesaxe * partition 647*fb2f18f8Sesaxe */ 648*fb2f18f8Sesaxe bitset_add(&pg->cmt_cpus_actv_set, cp->cpu_seqid); 649*fb2f18f8Sesaxe bitset_add(&cp->cpu_part->cp_cmt_pgs, ((pg_t *)pg)->pg_id); 650*fb2f18f8Sesaxe } 651*fb2f18f8Sesaxe } 652*fb2f18f8Sesaxe 653*fb2f18f8Sesaxe /* 654*fb2f18f8Sesaxe * Class callback when a CPU goes inactive (offline) 655*fb2f18f8Sesaxe * 656*fb2f18f8Sesaxe * This is called in a context where CPUs are paused 657*fb2f18f8Sesaxe */ 658*fb2f18f8Sesaxe static void 659*fb2f18f8Sesaxe pg_cmt_cpu_inactive(cpu_t *cp) 660*fb2f18f8Sesaxe { 661*fb2f18f8Sesaxe int err; 662*fb2f18f8Sesaxe group_t *pgs; 663*fb2f18f8Sesaxe pg_cmt_t *pg; 664*fb2f18f8Sesaxe cpu_t *cpp; 665*fb2f18f8Sesaxe group_iter_t i; 666*fb2f18f8Sesaxe pg_cpu_itr_t cpu_itr; 667*fb2f18f8Sesaxe boolean_t found; 668*fb2f18f8Sesaxe 669*fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 670*fb2f18f8Sesaxe 671*fb2f18f8Sesaxe pgs = &cp->cpu_pg->pgs; 672*fb2f18f8Sesaxe group_iter_init(&i); 673*fb2f18f8Sesaxe 674*fb2f18f8Sesaxe while ((pg = group_iterate(pgs, &i)) != NULL) { 675*fb2f18f8Sesaxe 676*fb2f18f8Sesaxe if (IS_CMT_PG(pg) == 0) 677*fb2f18f8Sesaxe continue; 678*fb2f18f8Sesaxe 679*fb2f18f8Sesaxe /* 680*fb2f18f8Sesaxe * Remove the CPU from the CMT PGs active CPU group 681*fb2f18f8Sesaxe * bitmap 682*fb2f18f8Sesaxe */ 683*fb2f18f8Sesaxe err = group_remove(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); 684*fb2f18f8Sesaxe ASSERT(err == 0); 685*fb2f18f8Sesaxe 686*fb2f18f8Sesaxe bitset_del(&pg->cmt_cpus_actv_set, cp->cpu_seqid); 687*fb2f18f8Sesaxe 688*fb2f18f8Sesaxe /* 689*fb2f18f8Sesaxe * If there are no more active CPUs in this PG over which 690*fb2f18f8Sesaxe * load was balanced, remove it as a balancing candidate. 691*fb2f18f8Sesaxe */ 692*fb2f18f8Sesaxe if (GROUP_SIZE(&pg->cmt_cpus_actv) == 0 && 693*fb2f18f8Sesaxe pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) { 694*fb2f18f8Sesaxe err = group_remove(pg->cmt_siblings, pg, GRP_NORESIZE); 695*fb2f18f8Sesaxe ASSERT(err == 0); 696*fb2f18f8Sesaxe } 697*fb2f18f8Sesaxe 698*fb2f18f8Sesaxe /* 699*fb2f18f8Sesaxe * Assert the number of active CPUs does not exceed 700*fb2f18f8Sesaxe * the total number of CPUs in the PG 701*fb2f18f8Sesaxe */ 702*fb2f18f8Sesaxe ASSERT(GROUP_SIZE(&pg->cmt_cpus_actv) <= 703*fb2f18f8Sesaxe GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); 704*fb2f18f8Sesaxe 705*fb2f18f8Sesaxe /* 706*fb2f18f8Sesaxe * Update the PG bitset in the CPU's old partition 707*fb2f18f8Sesaxe */ 708*fb2f18f8Sesaxe found = B_FALSE; 709*fb2f18f8Sesaxe PG_CPU_ITR_INIT(pg, cpu_itr); 710*fb2f18f8Sesaxe while ((cpp = pg_cpu_next(&cpu_itr)) != NULL) { 711*fb2f18f8Sesaxe if (cpp == cp) 712*fb2f18f8Sesaxe continue; 713*fb2f18f8Sesaxe if (cpp->cpu_part->cp_id == cp->cpu_part->cp_id) { 714*fb2f18f8Sesaxe found = B_TRUE; 715*fb2f18f8Sesaxe break; 716*fb2f18f8Sesaxe } 717*fb2f18f8Sesaxe } 718*fb2f18f8Sesaxe if (!found) { 719*fb2f18f8Sesaxe bitset_del(&cp->cpu_part->cp_cmt_pgs, 720*fb2f18f8Sesaxe ((pg_t *)pg)->pg_id); 721*fb2f18f8Sesaxe } 722*fb2f18f8Sesaxe } 723*fb2f18f8Sesaxe } 724*fb2f18f8Sesaxe 725*fb2f18f8Sesaxe /* 726*fb2f18f8Sesaxe * Return non-zero if the CPU belongs in the given PG 727*fb2f18f8Sesaxe */ 728*fb2f18f8Sesaxe static int 729*fb2f18f8Sesaxe pg_cmt_cpu_belongs(pg_t *pg, cpu_t *cp) 730*fb2f18f8Sesaxe { 731*fb2f18f8Sesaxe cpu_t *pg_cpu; 732*fb2f18f8Sesaxe 733*fb2f18f8Sesaxe pg_cpu = GROUP_ACCESS(&pg->pg_cpus, 0); 734*fb2f18f8Sesaxe 735*fb2f18f8Sesaxe ASSERT(pg_cpu != NULL); 736*fb2f18f8Sesaxe 737*fb2f18f8Sesaxe /* 738*fb2f18f8Sesaxe * The CPU belongs if, given the nature of the hardware sharing 739*fb2f18f8Sesaxe * relationship represented by the PG, the CPU has that 740*fb2f18f8Sesaxe * relationship with some other CPU already in the PG 741*fb2f18f8Sesaxe */ 742*fb2f18f8Sesaxe if (pg_plat_cpus_share(cp, pg_cpu, ((pghw_t *)pg)->pghw_hw)) 743*fb2f18f8Sesaxe return (1); 744*fb2f18f8Sesaxe 745*fb2f18f8Sesaxe return (0); 746*fb2f18f8Sesaxe } 747*fb2f18f8Sesaxe 748*fb2f18f8Sesaxe /* 749*fb2f18f8Sesaxe * Pack the CPUs CMT hierarchy 750*fb2f18f8Sesaxe * The hierarchy order is preserved 751*fb2f18f8Sesaxe */ 752*fb2f18f8Sesaxe static void 753*fb2f18f8Sesaxe pg_cmt_hier_pack(pg_cmt_t *hier[], int sz) 754*fb2f18f8Sesaxe { 755*fb2f18f8Sesaxe int i, j; 756*fb2f18f8Sesaxe 757*fb2f18f8Sesaxe for (i = 0; i < sz; i++) { 758*fb2f18f8Sesaxe if (hier[i] != NULL) 759*fb2f18f8Sesaxe continue; 760*fb2f18f8Sesaxe 761*fb2f18f8Sesaxe for (j = i; j < sz; j++) { 762*fb2f18f8Sesaxe if (hier[j] != NULL) { 763*fb2f18f8Sesaxe hier[i] = hier[j]; 764*fb2f18f8Sesaxe hier[j] = NULL; 765*fb2f18f8Sesaxe break; 766*fb2f18f8Sesaxe } 767*fb2f18f8Sesaxe } 768*fb2f18f8Sesaxe if (j == sz) 769*fb2f18f8Sesaxe break; 770*fb2f18f8Sesaxe } 771*fb2f18f8Sesaxe } 772*fb2f18f8Sesaxe 773*fb2f18f8Sesaxe /* 774*fb2f18f8Sesaxe * Return a cmt_lgrp_t * given an lgroup handle. 775*fb2f18f8Sesaxe * If the right one doesn't yet exist, create one 776*fb2f18f8Sesaxe * by growing the cmt_lgrps array 777*fb2f18f8Sesaxe */ 778*fb2f18f8Sesaxe static cmt_lgrp_t * 779*fb2f18f8Sesaxe pg_cmt_find_lgrp(lgrp_handle_t hand) 780*fb2f18f8Sesaxe { 781*fb2f18f8Sesaxe cmt_lgrp_t *lgrp; 782*fb2f18f8Sesaxe 783*fb2f18f8Sesaxe ASSERT(MUTEX_HELD(&cpu_lock)); 784*fb2f18f8Sesaxe 785*fb2f18f8Sesaxe lgrp = cmt_lgrps; 786*fb2f18f8Sesaxe while (lgrp != NULL) { 787*fb2f18f8Sesaxe if (lgrp->cl_hand == hand) 788*fb2f18f8Sesaxe return (lgrp); 789*fb2f18f8Sesaxe lgrp = lgrp->cl_next; 790*fb2f18f8Sesaxe } 791*fb2f18f8Sesaxe 792*fb2f18f8Sesaxe /* 793*fb2f18f8Sesaxe * Haven't seen this lgrp yet 794*fb2f18f8Sesaxe */ 795*fb2f18f8Sesaxe lgrp = kmem_zalloc(sizeof (cmt_lgrp_t), KM_SLEEP); 796*fb2f18f8Sesaxe 797*fb2f18f8Sesaxe lgrp->cl_hand = hand; 798*fb2f18f8Sesaxe lgrp->cl_npgs = 0; 799*fb2f18f8Sesaxe lgrp->cl_next = cmt_lgrps; 800*fb2f18f8Sesaxe cmt_lgrps = lgrp; 801*fb2f18f8Sesaxe group_create(&lgrp->cl_pgs); 802*fb2f18f8Sesaxe 803*fb2f18f8Sesaxe return (lgrp); 804*fb2f18f8Sesaxe } 805