1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/systm.h> 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/thread.h> 32 #include <sys/cpuvar.h> 33 #include <sys/cpupart.h> 34 #include <sys/kmem.h> 35 #include <sys/cmn_err.h> 36 #include <sys/kstat.h> 37 #include <sys/processor.h> 38 #include <sys/disp.h> 39 #include <sys/group.h> 40 #include <sys/pghw.h> 41 #include <sys/bitset.h> 42 #include <sys/lgrp.h> 43 #include <sys/cmt.h> 44 45 /* 46 * CMT scheduler / dispatcher support 47 * 48 * This file implements CMT scheduler support using Processor Groups. 49 * The CMT processor group class creates and maintains the CMT class 50 * specific processor group pg_cmt_t. 51 * 52 * ---------------------------- <-- pg_cmt_t * 53 * | pghw_t | 54 * ---------------------------- 55 * | CMT class specific data | 56 * | - hierarchy linkage | 57 * | - CMT load balancing data| 58 * | - active CPU group/bitset| 59 * ---------------------------- 60 * 61 * The scheduler/dispatcher leverages knowledge of the performance 62 * relevant CMT sharing relationships existing between cpus to implement 63 * optimized affinity and load balancing policies. 64 * 65 * Load balancing policy seeks to improve performance by minimizing 66 * contention over shared processor resources / facilities, while the 67 * affinity policies seek to improve cache and TLB utilization. 68 * 69 * The CMT PGs created by this class are already arranged into a 70 * hierarchy (which is done in the pghw layer). To implement the top-down 71 * CMT load balancing algorithm, the CMT PGs additionally maintain 72 * parent, child and sibling hierarchy relationships. 73 * Parent PGs always contain a superset of their children(s) resources, 74 * each PG can have at most one parent, and siblings are the group of PGs 75 * sharing the same parent. 76 * 77 * On NUMA systems, the CMT load balancing algorithm balances across the 78 * CMT PGs within their respective lgroups. On UMA based system, there 79 * exists a top level group of PGs to balance across. On NUMA systems multiple 80 * top level groups are instantiated, where the top level balancing begins by 81 * balancng across the CMT PGs within their respective (per lgroup) top level 82 * groups. 83 */ 84 85 typedef struct cmt_lgrp { 86 group_t cl_pgs; /* Top level group of active CMT PGs */ 87 int cl_npgs; /* # of top level PGs in the lgroup */ 88 lgrp_handle_t cl_hand; /* lgroup's platform handle */ 89 struct cmt_lgrp *cl_next; /* next cmt_lgrp */ 90 } cmt_lgrp_t; 91 92 static cmt_lgrp_t *cmt_lgrps = NULL; /* cmt_lgrps list head */ 93 static cmt_lgrp_t *cpu0_lgrp = NULL; /* boot CPU's initial lgrp */ 94 /* used for null_proc_lpa */ 95 96 static int is_cpu0 = 1; /* true if this is boot CPU context */ 97 98 /* 99 * Set this to non-zero to disable CMT scheduling 100 * This must be done via kmdb -d, as /etc/system will be too late 101 */ 102 static int cmt_sched_disabled = 0; 103 104 static pg_cid_t pg_cmt_class_id; /* PG class id */ 105 106 static pg_t *pg_cmt_alloc(); 107 static void pg_cmt_free(pg_t *); 108 static void pg_cmt_cpu_init(cpu_t *); 109 static void pg_cmt_cpu_fini(cpu_t *); 110 static void pg_cmt_cpu_active(cpu_t *); 111 static void pg_cmt_cpu_inactive(cpu_t *); 112 static void pg_cmt_cpupart_in(cpu_t *, cpupart_t *); 113 static void pg_cmt_cpupart_move(cpu_t *, cpupart_t *, cpupart_t *); 114 static void pg_cmt_hier_pack(pg_cmt_t **, int); 115 static int pg_cmt_cpu_belongs(pg_t *, cpu_t *); 116 static int pg_cmt_hw(pghw_type_t); 117 static cmt_lgrp_t *pg_cmt_find_lgrp(lgrp_handle_t); 118 static cmt_lgrp_t *pg_cmt_lgrp_create(lgrp_handle_t); 119 120 /* 121 * Macro to test if PG is managed by the CMT PG class 122 */ 123 #define IS_CMT_PG(pg) (((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id) 124 125 /* 126 * CMT PG ops 127 */ 128 struct pg_ops pg_ops_cmt = { 129 pg_cmt_alloc, 130 pg_cmt_free, 131 pg_cmt_cpu_init, 132 pg_cmt_cpu_fini, 133 pg_cmt_cpu_active, 134 pg_cmt_cpu_inactive, 135 pg_cmt_cpupart_in, 136 NULL, /* cpupart_out */ 137 pg_cmt_cpupart_move, 138 pg_cmt_cpu_belongs, 139 }; 140 141 /* 142 * Initialize the CMT PG class 143 */ 144 void 145 pg_cmt_class_init(void) 146 { 147 if (cmt_sched_disabled) 148 return; 149 150 pg_cmt_class_id = pg_class_register("cmt", &pg_ops_cmt, PGR_PHYSICAL); 151 } 152 153 /* 154 * Called to indicate a new CPU has started up so 155 * that either t0 or the slave startup thread can 156 * be accounted for. 157 */ 158 void 159 pg_cmt_cpu_startup(cpu_t *cp) 160 { 161 PG_NRUN_UPDATE(cp, 1); 162 } 163 164 /* 165 * Adjust the CMT load in the CMT PGs in which the CPU belongs 166 * Note that "n" can be positive in the case of increasing 167 * load, or negative in the case of decreasing load. 168 */ 169 void 170 pg_cmt_load(cpu_t *cp, int n) 171 { 172 pg_cmt_t *pg; 173 174 pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage; 175 while (pg != NULL) { 176 ASSERT(IS_CMT_PG(pg)); 177 atomic_add_32(&pg->cmt_nrunning, n); 178 pg = pg->cmt_parent; 179 } 180 } 181 182 /* 183 * Return non-zero if thread can migrate between "from" and "to" 184 * without a performance penalty 185 */ 186 int 187 pg_cmt_can_migrate(cpu_t *from, cpu_t *to) 188 { 189 if (from->cpu_physid->cpu_cacheid == 190 to->cpu_physid->cpu_cacheid) 191 return (1); 192 return (0); 193 } 194 195 /* 196 * CMT class specific PG allocation 197 */ 198 static pg_t * 199 pg_cmt_alloc(void) 200 { 201 return (kmem_zalloc(sizeof (pg_cmt_t), KM_NOSLEEP)); 202 } 203 204 /* 205 * Class specific PG de-allocation 206 */ 207 static void 208 pg_cmt_free(pg_t *pg) 209 { 210 ASSERT(pg != NULL); 211 ASSERT(IS_CMT_PG(pg)); 212 213 kmem_free((pg_cmt_t *)pg, sizeof (pg_cmt_t)); 214 } 215 216 /* 217 * Return 1 if CMT load balancing policies should be 218 * implemented across instances of the specified hardware 219 * sharing relationship. 220 */ 221 static int 222 pg_cmt_load_bal_hw(pghw_type_t hw) 223 { 224 if (hw == PGHW_IPIPE || 225 hw == PGHW_FPU || 226 hw == PGHW_CHIP) 227 return (1); 228 else 229 return (0); 230 } 231 232 /* 233 * Return 1 if thread affinity polices should be implemented 234 * for instances of the specifed hardware sharing relationship. 235 */ 236 static int 237 pg_cmt_affinity_hw(pghw_type_t hw) 238 { 239 if (hw == PGHW_CACHE) 240 return (1); 241 else 242 return (0); 243 } 244 245 /* 246 * Return 1 if CMT scheduling policies should be impelmented 247 * for the specified hardware sharing relationship. 248 */ 249 static int 250 pg_cmt_hw(pghw_type_t hw) 251 { 252 return (pg_cmt_load_bal_hw(hw) || 253 pg_cmt_affinity_hw(hw)); 254 } 255 256 /* 257 * CMT class callback for a new CPU entering the system 258 */ 259 static void 260 pg_cmt_cpu_init(cpu_t *cp) 261 { 262 pg_cmt_t *pg; 263 group_t *cmt_pgs; 264 int level, max_level, nlevels; 265 pghw_type_t hw; 266 pg_t *pg_cache = NULL; 267 pg_cmt_t *cpu_cmt_hier[PGHW_NUM_COMPONENTS]; 268 lgrp_handle_t lgrp_handle; 269 cmt_lgrp_t *lgrp; 270 271 ASSERT(MUTEX_HELD(&cpu_lock)); 272 273 /* 274 * A new CPU is coming into the system. 275 * Interrogate the platform to see if the CPU 276 * has any performance relevant CMT sharing 277 * relationships 278 */ 279 cmt_pgs = &cp->cpu_pg->cmt_pgs; 280 cp->cpu_pg->cmt_lineage = NULL; 281 282 bzero(cpu_cmt_hier, sizeof (cpu_cmt_hier)); 283 max_level = nlevels = 0; 284 for (hw = PGHW_START; hw < PGHW_NUM_COMPONENTS; hw++) { 285 286 /* 287 * We're only interested in CMT hw sharing relationships 288 */ 289 if (pg_cmt_hw(hw) == 0 || pg_plat_hw_shared(cp, hw) == 0) 290 continue; 291 292 /* 293 * Find (or create) the PG associated with 294 * the hw sharing relationship in which cp 295 * belongs. 296 * 297 * Determine if a suitable PG already 298 * exists, or if one needs to be created. 299 */ 300 pg = (pg_cmt_t *)pghw_place_cpu(cp, hw); 301 if (pg == NULL) { 302 /* 303 * Create a new one. 304 * Initialize the common... 305 */ 306 pg = (pg_cmt_t *)pg_create(pg_cmt_class_id); 307 308 /* ... physical ... */ 309 pghw_init((pghw_t *)pg, cp, hw); 310 311 /* 312 * ... and CMT specific portions of the 313 * structure. 314 */ 315 bitset_init(&pg->cmt_cpus_actv_set); 316 group_create(&pg->cmt_cpus_actv); 317 } else { 318 ASSERT(IS_CMT_PG(pg)); 319 } 320 321 /* Add the CPU to the PG */ 322 pg_cpu_add((pg_t *)pg, cp); 323 324 /* 325 * Ensure capacity of the active CPUs group/bitset 326 */ 327 group_expand(&pg->cmt_cpus_actv, 328 GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); 329 330 if (cp->cpu_seqid >= 331 bitset_capacity(&pg->cmt_cpus_actv_set)) { 332 bitset_resize(&pg->cmt_cpus_actv_set, 333 cp->cpu_seqid + 1); 334 } 335 336 /* 337 * Build a lineage of CMT PGs for load balancing 338 */ 339 if (pg_cmt_load_bal_hw(hw)) { 340 level = pghw_level(hw); 341 cpu_cmt_hier[level] = pg; 342 if (level > max_level) 343 max_level = level; 344 nlevels++; 345 } 346 347 /* Cache this for later */ 348 if (hw == PGHW_CACHE) 349 pg_cache = (pg_t *)pg; 350 } 351 352 /* 353 * Pack out any gaps in the constructed lineage. 354 * Gaps may exist where the architecture knows 355 * about a hardware sharing relationship, but such a 356 * relationship either isn't relevant for load 357 * balancing or doesn't exist between CPUs on the system. 358 */ 359 pg_cmt_hier_pack(cpu_cmt_hier, max_level + 1); 360 361 /* 362 * For each of the PGs int the CPU's lineage: 363 * - Add an entry in the CPU sorted CMT PG group 364 * which is used for top down CMT load balancing 365 * - Tie the PG into the CMT hierarchy by connecting 366 * it to it's parent and siblings. 367 */ 368 group_expand(cmt_pgs, nlevels); 369 370 /* 371 * Find the lgrp that encapsulates this CPU's CMT hierarchy 372 */ 373 lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); 374 lgrp = pg_cmt_find_lgrp(lgrp_handle); 375 if (lgrp == NULL) 376 lgrp = pg_cmt_lgrp_create(lgrp_handle); 377 378 for (level = 0; level < nlevels; level++) { 379 uint_t children; 380 int err; 381 382 pg = cpu_cmt_hier[level]; 383 err = group_add_at(cmt_pgs, pg, nlevels - level - 1); 384 ASSERT(err == 0); 385 386 if (level == 0) 387 cp->cpu_pg->cmt_lineage = (pg_t *)pg; 388 389 if (pg->cmt_siblings != NULL) { 390 /* Already initialized */ 391 ASSERT(pg->cmt_parent == NULL || 392 pg->cmt_parent == cpu_cmt_hier[level + 1]); 393 ASSERT(pg->cmt_siblings == &lgrp->cl_pgs || 394 pg->cmt_siblings == pg->cmt_parent->cmt_children); 395 continue; 396 } 397 398 if ((level + 1) == nlevels) { 399 pg->cmt_parent = NULL; 400 pg->cmt_siblings = &lgrp->cl_pgs; 401 children = ++lgrp->cl_npgs; 402 } else { 403 pg->cmt_parent = cpu_cmt_hier[level + 1]; 404 405 /* 406 * A good parent keeps track of their children. 407 * The parent's children group is also the PG's 408 * siblings. 409 */ 410 if (pg->cmt_parent->cmt_children == NULL) { 411 pg->cmt_parent->cmt_children = 412 kmem_zalloc(sizeof (group_t), KM_SLEEP); 413 group_create(pg->cmt_parent->cmt_children); 414 } 415 pg->cmt_siblings = pg->cmt_parent->cmt_children; 416 children = ++pg->cmt_parent->cmt_nchildren; 417 } 418 pg->cmt_hint = 0; 419 group_expand(pg->cmt_siblings, children); 420 } 421 422 /* 423 * Cache the chip and core IDs in the cpu_t->cpu_physid structure 424 * for fast lookups later. 425 */ 426 if (cp->cpu_physid) { 427 cp->cpu_physid->cpu_chipid = 428 pg_plat_hw_instance_id(cp, PGHW_CHIP); 429 cp->cpu_physid->cpu_coreid = pg_plat_get_core_id(cp); 430 431 /* 432 * If this cpu has a PG representing shared cache, then set 433 * cpu_cacheid to that PG's logical id 434 */ 435 if (pg_cache) 436 cp->cpu_physid->cpu_cacheid = pg_cache->pg_id; 437 } 438 439 /* CPU0 only initialization */ 440 if (is_cpu0) { 441 pg_cmt_cpu_startup(cp); 442 is_cpu0 = 0; 443 cpu0_lgrp = lgrp; 444 } 445 446 } 447 448 /* 449 * Class callback when a CPU is leaving the system (deletion) 450 */ 451 static void 452 pg_cmt_cpu_fini(cpu_t *cp) 453 { 454 group_iter_t i; 455 pg_cmt_t *pg; 456 group_t *pgs, *cmt_pgs; 457 lgrp_handle_t lgrp_handle; 458 cmt_lgrp_t *lgrp; 459 460 pgs = &cp->cpu_pg->pgs; 461 cmt_pgs = &cp->cpu_pg->cmt_pgs; 462 463 /* 464 * Find the lgroup that encapsulates this CPU's CMT hierarchy 465 */ 466 lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); 467 468 lgrp = pg_cmt_find_lgrp(lgrp_handle); 469 if (lgrp == NULL) { 470 /* 471 * This is a bit of a special case. 472 * The only way this can happen is if the CPU's lgrp 473 * handle changed out from underneath us, which is what 474 * happens with null_proc_lpa on starcat systems. 475 * 476 * Use the initial boot CPU lgrp, since this is what 477 * we need to tear down. 478 */ 479 lgrp = cpu0_lgrp; 480 } 481 482 /* 483 * First, clean up anything load balancing specific for each of 484 * the CPU's PGs that participated in CMT load balancing 485 */ 486 pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage; 487 while (pg != NULL) { 488 489 /* 490 * Remove the PG from the CPU's load balancing lineage 491 */ 492 (void) group_remove(cmt_pgs, pg, GRP_RESIZE); 493 494 /* 495 * If it's about to become empty, destroy it's children 496 * group, and remove it's reference from it's siblings. 497 * This is done here (rather than below) to avoid removing 498 * our reference from a PG that we just eliminated. 499 */ 500 if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 1) { 501 if (pg->cmt_children != NULL) 502 group_destroy(pg->cmt_children); 503 if (pg->cmt_siblings != NULL) { 504 if (pg->cmt_siblings == &lgrp->cl_pgs) 505 lgrp->cl_npgs--; 506 else 507 pg->cmt_parent->cmt_nchildren--; 508 } 509 } 510 pg = pg->cmt_parent; 511 } 512 513 ASSERT(GROUP_SIZE(cmt_pgs) == 0); 514 515 /* 516 * Now that the load balancing lineage updates have happened, 517 * remove the CPU from all it's PGs (destroying any that become 518 * empty). 519 */ 520 group_iter_init(&i); 521 while ((pg = group_iterate(pgs, &i)) != NULL) { 522 if (IS_CMT_PG(pg) == 0) 523 continue; 524 525 pg_cpu_delete((pg_t *)pg, cp); 526 /* 527 * Deleting the CPU from the PG changes the CPU's 528 * PG group over which we are actively iterating 529 * Re-initialize the iteration 530 */ 531 group_iter_init(&i); 532 533 if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 0) { 534 535 /* 536 * The PG has become zero sized, so destroy it. 537 */ 538 group_destroy(&pg->cmt_cpus_actv); 539 bitset_fini(&pg->cmt_cpus_actv_set); 540 pghw_fini((pghw_t *)pg); 541 542 pg_destroy((pg_t *)pg); 543 } 544 } 545 } 546 547 /* 548 * Class callback when a CPU is entering a cpu partition 549 */ 550 static void 551 pg_cmt_cpupart_in(cpu_t *cp, cpupart_t *pp) 552 { 553 group_t *pgs; 554 pg_t *pg; 555 group_iter_t i; 556 557 ASSERT(MUTEX_HELD(&cpu_lock)); 558 559 pgs = &cp->cpu_pg->pgs; 560 561 /* 562 * Ensure that the new partition's PG bitset 563 * is large enough for all CMT PG's to which cp 564 * belongs 565 */ 566 group_iter_init(&i); 567 while ((pg = group_iterate(pgs, &i)) != NULL) { 568 if (IS_CMT_PG(pg) == 0) 569 continue; 570 571 if (bitset_capacity(&pp->cp_cmt_pgs) <= pg->pg_id) 572 bitset_resize(&pp->cp_cmt_pgs, pg->pg_id + 1); 573 } 574 } 575 576 /* 577 * Class callback when a CPU is actually moving partitions 578 */ 579 static void 580 pg_cmt_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) 581 { 582 cpu_t *cpp; 583 group_t *pgs; 584 pg_t *pg; 585 group_iter_t pg_iter; 586 pg_cpu_itr_t cpu_iter; 587 boolean_t found; 588 589 ASSERT(MUTEX_HELD(&cpu_lock)); 590 591 pgs = &cp->cpu_pg->pgs; 592 group_iter_init(&pg_iter); 593 594 /* 595 * Iterate over the CPUs CMT PGs 596 */ 597 while ((pg = group_iterate(pgs, &pg_iter)) != NULL) { 598 599 if (IS_CMT_PG(pg) == 0) 600 continue; 601 602 /* 603 * Add the PG to the bitset in the new partition. 604 */ 605 bitset_add(&newpp->cp_cmt_pgs, pg->pg_id); 606 607 /* 608 * Remove the PG from the bitset in the old partition 609 * if the last of the PG's CPUs have left. 610 */ 611 found = B_FALSE; 612 PG_CPU_ITR_INIT(pg, cpu_iter); 613 while ((cpp = pg_cpu_next(&cpu_iter)) != NULL) { 614 if (cpp == cp) 615 continue; 616 if (CPU_ACTIVE(cpp) && 617 cpp->cpu_part->cp_id == oldpp->cp_id) { 618 found = B_TRUE; 619 break; 620 } 621 } 622 if (!found) 623 bitset_del(&cp->cpu_part->cp_cmt_pgs, pg->pg_id); 624 } 625 } 626 627 /* 628 * Class callback when a CPU becomes active (online) 629 * 630 * This is called in a context where CPUs are paused 631 */ 632 static void 633 pg_cmt_cpu_active(cpu_t *cp) 634 { 635 int err; 636 group_iter_t i; 637 pg_cmt_t *pg; 638 group_t *pgs; 639 640 ASSERT(MUTEX_HELD(&cpu_lock)); 641 642 pgs = &cp->cpu_pg->pgs; 643 group_iter_init(&i); 644 645 /* 646 * Iterate over the CPU's PGs 647 */ 648 while ((pg = group_iterate(pgs, &i)) != NULL) { 649 650 if (IS_CMT_PG(pg) == 0) 651 continue; 652 653 err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); 654 ASSERT(err == 0); 655 656 /* 657 * If this is the first active CPU in the PG, and it 658 * represents a hardware sharing relationship over which 659 * CMT load balancing is performed, add it as a candidate 660 * for balancing with it's siblings. 661 */ 662 if (GROUP_SIZE(&pg->cmt_cpus_actv) == 1 && 663 pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) { 664 err = group_add(pg->cmt_siblings, pg, GRP_NORESIZE); 665 ASSERT(err == 0); 666 } 667 668 /* 669 * Notate the CPU in the PGs active CPU bitset. 670 * Also notate the PG as being active in it's associated 671 * partition 672 */ 673 bitset_add(&pg->cmt_cpus_actv_set, cp->cpu_seqid); 674 bitset_add(&cp->cpu_part->cp_cmt_pgs, ((pg_t *)pg)->pg_id); 675 } 676 } 677 678 /* 679 * Class callback when a CPU goes inactive (offline) 680 * 681 * This is called in a context where CPUs are paused 682 */ 683 static void 684 pg_cmt_cpu_inactive(cpu_t *cp) 685 { 686 int err; 687 group_t *pgs; 688 pg_cmt_t *pg; 689 cpu_t *cpp; 690 group_iter_t i; 691 pg_cpu_itr_t cpu_itr; 692 boolean_t found; 693 694 ASSERT(MUTEX_HELD(&cpu_lock)); 695 696 pgs = &cp->cpu_pg->pgs; 697 group_iter_init(&i); 698 699 while ((pg = group_iterate(pgs, &i)) != NULL) { 700 701 if (IS_CMT_PG(pg) == 0) 702 continue; 703 704 /* 705 * Remove the CPU from the CMT PGs active CPU group 706 * bitmap 707 */ 708 err = group_remove(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); 709 ASSERT(err == 0); 710 711 bitset_del(&pg->cmt_cpus_actv_set, cp->cpu_seqid); 712 713 /* 714 * If there are no more active CPUs in this PG over which 715 * load was balanced, remove it as a balancing candidate. 716 */ 717 if (GROUP_SIZE(&pg->cmt_cpus_actv) == 0 && 718 pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) { 719 err = group_remove(pg->cmt_siblings, pg, GRP_NORESIZE); 720 ASSERT(err == 0); 721 } 722 723 /* 724 * Assert the number of active CPUs does not exceed 725 * the total number of CPUs in the PG 726 */ 727 ASSERT(GROUP_SIZE(&pg->cmt_cpus_actv) <= 728 GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); 729 730 /* 731 * Update the PG bitset in the CPU's old partition 732 */ 733 found = B_FALSE; 734 PG_CPU_ITR_INIT(pg, cpu_itr); 735 while ((cpp = pg_cpu_next(&cpu_itr)) != NULL) { 736 if (cpp == cp) 737 continue; 738 if (CPU_ACTIVE(cpp) && 739 cpp->cpu_part->cp_id == cp->cpu_part->cp_id) { 740 found = B_TRUE; 741 break; 742 } 743 } 744 if (!found) { 745 bitset_del(&cp->cpu_part->cp_cmt_pgs, 746 ((pg_t *)pg)->pg_id); 747 } 748 } 749 } 750 751 /* 752 * Return non-zero if the CPU belongs in the given PG 753 */ 754 static int 755 pg_cmt_cpu_belongs(pg_t *pg, cpu_t *cp) 756 { 757 cpu_t *pg_cpu; 758 759 pg_cpu = GROUP_ACCESS(&pg->pg_cpus, 0); 760 761 ASSERT(pg_cpu != NULL); 762 763 /* 764 * The CPU belongs if, given the nature of the hardware sharing 765 * relationship represented by the PG, the CPU has that 766 * relationship with some other CPU already in the PG 767 */ 768 if (pg_plat_cpus_share(cp, pg_cpu, ((pghw_t *)pg)->pghw_hw)) 769 return (1); 770 771 return (0); 772 } 773 774 /* 775 * Pack the CPUs CMT hierarchy 776 * The hierarchy order is preserved 777 */ 778 static void 779 pg_cmt_hier_pack(pg_cmt_t *hier[], int sz) 780 { 781 int i, j; 782 783 for (i = 0; i < sz; i++) { 784 if (hier[i] != NULL) 785 continue; 786 787 for (j = i; j < sz; j++) { 788 if (hier[j] != NULL) { 789 hier[i] = hier[j]; 790 hier[j] = NULL; 791 break; 792 } 793 } 794 if (j == sz) 795 break; 796 } 797 } 798 799 /* 800 * Return a cmt_lgrp_t * given an lgroup handle. 801 */ 802 static cmt_lgrp_t * 803 pg_cmt_find_lgrp(lgrp_handle_t hand) 804 { 805 cmt_lgrp_t *lgrp; 806 807 ASSERT(MUTEX_HELD(&cpu_lock)); 808 809 lgrp = cmt_lgrps; 810 while (lgrp != NULL) { 811 if (lgrp->cl_hand == hand) 812 break; 813 lgrp = lgrp->cl_next; 814 } 815 return (lgrp); 816 } 817 818 /* 819 * Create a cmt_lgrp_t with the specified handle. 820 */ 821 static cmt_lgrp_t * 822 pg_cmt_lgrp_create(lgrp_handle_t hand) 823 { 824 cmt_lgrp_t *lgrp; 825 826 ASSERT(MUTEX_HELD(&cpu_lock)); 827 828 lgrp = kmem_zalloc(sizeof (cmt_lgrp_t), KM_SLEEP); 829 830 lgrp->cl_hand = hand; 831 lgrp->cl_npgs = 0; 832 lgrp->cl_next = cmt_lgrps; 833 cmt_lgrps = lgrp; 834 group_create(&lgrp->cl_pgs); 835 836 return (lgrp); 837 } 838