1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/systm.h> 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/thread.h> 30 #include <sys/cpuvar.h> 31 #include <sys/cpupart.h> 32 #include <sys/kmem.h> 33 #include <sys/cmn_err.h> 34 #include <sys/kstat.h> 35 #include <sys/processor.h> 36 #include <sys/disp.h> 37 #include <sys/group.h> 38 #include <sys/pg.h> 39 40 /* 41 * Processor groups 42 * 43 * With the introduction of Chip Multi-Threaded (CMT) processor architectures, 44 * it is no longer necessarily true that a given physical processor module 45 * will present itself as a single schedulable entity (cpu_t). Rather, each 46 * chip and/or processor core may present itself as one or more "logical" CPUs. 47 * 48 * The logical CPUs presented may share physical components such as caches, 49 * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the 50 * kernel be aware of the relationships existing between logical CPUs so that 51 * the appropriate optmizations may be employed. 52 * 53 * The processor group abstraction represents a set of logical CPUs that 54 * generally share some sort of physical or characteristic relationship. 55 * 56 * In the case of a physical sharing relationship, the CPUs in the group may 57 * share a pipeline, cache or floating point unit. In the case of a logical 58 * relationship, a PG may represent the set of CPUs in a processor set, or the 59 * set of CPUs running at a particular clock speed. 60 * 61 * The generic processor group structure, pg_t, contains the elements generic 62 * to a group of CPUs. Depending on the nature of the CPU relationship 63 * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that 64 * PG where more specific data is represented. 65 * 66 * As an example, a PG representing a PHYSICAL relationship, may be recast to 67 * a pghw_t, where data further describing the hardware sharing relationship 68 * is maintained. See pghw.c and pghw.h for details on physical PGs. 69 * 70 * At this time a more specialized casting of a PG representing a LOGICAL 71 * relationship has not been implemented, but the architecture allows for this 72 * in the future. 73 * 74 * Processor Group Classes 75 * 76 * Processor group consumers may wish to maintain and associate specific 77 * data with the PGs they create. For this reason, a mechanism for creating 78 * class specific PGs exists. Classes may overload the default functions for 79 * creating, destroying, and associating CPUs with PGs, and may also register 80 * class specific callbacks to be invoked when the CPU related system 81 * configuration changes. Class specific data is stored/associated with 82 * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first 83 * element of a class specific PG object. In memory, such a structure may look 84 * like: 85 * 86 * ----------------------- - - - 87 * | common | | | | <--(pg_t *) 88 * ----------------------- | | - 89 * | HW specific | | | <-----(pghw_t *) 90 * ----------------------- | - 91 * | class specific | | <-------(pg_cmt_t *) 92 * ----------------------- - 93 * 94 * Access to the PG class specific data can be had by casting a pointer to 95 * it's class specific view. 96 */ 97 98 static pg_t *pg_alloc_default(pg_class_t); 99 static void pg_free_default(pg_t *); 100 static void pg_null_op(); 101 102 /* 103 * Bootstrap CPU specific PG data 104 * See pg_cpu_bootstrap() 105 */ 106 static cpu_pg_t bootstrap_pg_data; 107 108 /* 109 * Bitset of allocated PG ids (they are sequential) 110 * and the next free id in the set. 111 */ 112 static bitset_t pg_id_set; 113 static pgid_t pg_id_next = 0; 114 115 /* 116 * Default and externed PG ops vectors 117 */ 118 static struct pg_ops pg_ops_default = { 119 pg_alloc_default, /* alloc */ 120 pg_free_default, /* free */ 121 NULL, /* cpu_init */ 122 NULL, /* cpu_fini */ 123 NULL, /* cpu_active */ 124 NULL, /* cpu_inactive */ 125 NULL, /* cpupart_in */ 126 NULL, /* cpupart_out */ 127 NULL, /* cpupart_move */ 128 NULL, /* cpu_belongs */ 129 NULL, /* policy_name */ 130 }; 131 132 static struct pg_cb_ops pg_cb_ops_default = { 133 pg_null_op, /* thread_swtch */ 134 pg_null_op, /* thread_remain */ 135 }; 136 137 /* 138 * Class specific PG allocation callbacks 139 */ 140 #define PG_ALLOC(class) \ 141 (pg_classes[class].pgc_ops->alloc ? \ 142 pg_classes[class].pgc_ops->alloc() : \ 143 pg_classes[pg_default_cid].pgc_ops->alloc()) 144 145 #define PG_FREE(pg) \ 146 ((pg)->pg_class->pgc_ops->free ? \ 147 (pg)->pg_class->pgc_ops->free(pg) : \ 148 pg_classes[pg_default_cid].pgc_ops->free(pg)) \ 149 150 151 /* 152 * Class specific PG policy name 153 */ 154 #define PG_POLICY_NAME(pg) \ 155 ((pg)->pg_class->pgc_ops->policy_name ? \ 156 (pg)->pg_class->pgc_ops->policy_name(pg) : NULL) \ 157 158 /* 159 * Class specific membership test callback 160 */ 161 #define PG_CPU_BELONGS(pg, cp) \ 162 ((pg)->pg_class->pgc_ops->cpu_belongs ? \ 163 (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0) \ 164 165 /* 166 * CPU configuration callbacks 167 */ 168 #define PG_CPU_INIT(class, cp) \ 169 { \ 170 if (pg_classes[class].pgc_ops->cpu_init) \ 171 pg_classes[class].pgc_ops->cpu_init(cp); \ 172 } 173 174 #define PG_CPU_FINI(class, cp) \ 175 { \ 176 if (pg_classes[class].pgc_ops->cpu_fini) \ 177 pg_classes[class].pgc_ops->cpu_fini(cp); \ 178 } 179 180 #define PG_CPU_ACTIVE(class, cp) \ 181 { \ 182 if (pg_classes[class].pgc_ops->cpu_active) \ 183 pg_classes[class].pgc_ops->cpu_active(cp); \ 184 } 185 186 #define PG_CPU_INACTIVE(class, cp) \ 187 { \ 188 if (pg_classes[class].pgc_ops->cpu_inactive) \ 189 pg_classes[class].pgc_ops->cpu_inactive(cp); \ 190 } 191 192 /* 193 * CPU / cpupart configuration callbacks 194 */ 195 #define PG_CPUPART_IN(class, cp, pp) \ 196 { \ 197 if (pg_classes[class].pgc_ops->cpupart_in) \ 198 pg_classes[class].pgc_ops->cpupart_in(cp, pp); \ 199 } 200 201 #define PG_CPUPART_OUT(class, cp, pp) \ 202 { \ 203 if (pg_classes[class].pgc_ops->cpupart_out) \ 204 pg_classes[class].pgc_ops->cpupart_out(cp, pp); \ 205 } 206 207 #define PG_CPUPART_MOVE(class, cp, old, new) \ 208 { \ 209 if (pg_classes[class].pgc_ops->cpupart_move) \ 210 pg_classes[class].pgc_ops->cpupart_move(cp, old, new); \ 211 } 212 213 214 215 static pg_class_t *pg_classes; 216 static int pg_nclasses; 217 218 static pg_cid_t pg_default_cid; 219 220 /* 221 * Initialze common PG subsystem. 222 */ 223 void 224 pg_init(void) 225 { 226 extern void pg_cmt_class_init(); 227 228 pg_default_cid = 229 pg_class_register("default", &pg_ops_default, PGR_LOGICAL); 230 231 /* 232 * Initialize classes to allow them to register with the framework 233 */ 234 pg_cmt_class_init(); 235 236 pg_cpu0_init(); 237 } 238 239 /* 240 * Perform CPU 0 initialization 241 */ 242 void 243 pg_cpu0_init(void) 244 { 245 extern void pghw_physid_create(); 246 247 /* 248 * Create the physical ID cache for the boot CPU 249 */ 250 pghw_physid_create(CPU); 251 252 /* 253 * pg_cpu_* require that cpu_lock be held 254 */ 255 mutex_enter(&cpu_lock); 256 257 pg_cpu_init(CPU); 258 pg_cpupart_in(CPU, &cp_default); 259 pg_cpu_active(CPU); 260 261 mutex_exit(&cpu_lock); 262 } 263 264 /* 265 * Invoked when topology for CPU0 changes 266 * post pg_cpu0_init(). 267 * 268 * Currently happens as a result of null_proc_lpa 269 * on Starcat. 270 */ 271 void 272 pg_cpu0_reinit(void) 273 { 274 mutex_enter(&cpu_lock); 275 pg_cpu_inactive(CPU); 276 pg_cpupart_out(CPU, &cp_default); 277 pg_cpu_fini(CPU); 278 279 pg_cpu_init(CPU); 280 pg_cpupart_in(CPU, &cp_default); 281 pg_cpu_active(CPU); 282 mutex_exit(&cpu_lock); 283 } 284 285 /* 286 * Register a new PG class 287 */ 288 pg_cid_t 289 pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation) 290 { 291 pg_class_t *newclass; 292 pg_class_t *classes_old; 293 id_t cid; 294 295 mutex_enter(&cpu_lock); 296 297 /* 298 * Allocate a new pg_class_t in the pg_classes array 299 */ 300 if (pg_nclasses == 0) { 301 pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP); 302 } else { 303 classes_old = pg_classes; 304 pg_classes = 305 kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1), 306 KM_SLEEP); 307 (void) kcopy(classes_old, pg_classes, 308 sizeof (pg_class_t) * pg_nclasses); 309 kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses); 310 } 311 312 cid = pg_nclasses++; 313 newclass = &pg_classes[cid]; 314 315 (void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX); 316 newclass->pgc_id = cid; 317 newclass->pgc_ops = ops; 318 newclass->pgc_relation = relation; 319 320 mutex_exit(&cpu_lock); 321 322 return (cid); 323 } 324 325 /* 326 * Try to find an existing pg in set in which to place cp. 327 * Returns the pg if found, and NULL otherwise. 328 * In the event that the CPU could belong to multiple 329 * PGs in the set, the first matching PG will be returned. 330 */ 331 pg_t * 332 pg_cpu_find_pg(cpu_t *cp, group_t *set) 333 { 334 pg_t *pg; 335 group_iter_t i; 336 337 group_iter_init(&i); 338 while ((pg = group_iterate(set, &i)) != NULL) { 339 /* 340 * Ask the class if the CPU belongs here 341 */ 342 if (PG_CPU_BELONGS(pg, cp)) 343 return (pg); 344 } 345 return (NULL); 346 } 347 348 /* 349 * Iterate over the CPUs in a PG after initializing 350 * the iterator with PG_CPU_ITR_INIT() 351 */ 352 cpu_t * 353 pg_cpu_next(pg_cpu_itr_t *itr) 354 { 355 cpu_t *cpu; 356 pg_t *pg = itr->pg; 357 358 cpu = group_iterate(&pg->pg_cpus, &itr->position); 359 return (cpu); 360 } 361 362 /* 363 * Test if a given PG contains a given CPU 364 */ 365 boolean_t 366 pg_cpu_find(pg_t *pg, cpu_t *cp) 367 { 368 if (group_find(&pg->pg_cpus, cp) == (uint_t)-1) 369 return (B_FALSE); 370 371 return (B_TRUE); 372 } 373 374 /* 375 * Set the PGs callbacks to the default 376 */ 377 void 378 pg_callback_set_defaults(pg_t *pg) 379 { 380 bcopy(&pg_cb_ops_default, &pg->pg_cb, sizeof (struct pg_cb_ops)); 381 } 382 383 /* 384 * Create a PG of a given class. 385 * This routine may block. 386 */ 387 pg_t * 388 pg_create(pg_cid_t cid) 389 { 390 pg_t *pg; 391 pgid_t id; 392 393 ASSERT(MUTEX_HELD(&cpu_lock)); 394 395 /* 396 * Call the class specific PG allocation routine 397 */ 398 pg = PG_ALLOC(cid); 399 pg->pg_class = &pg_classes[cid]; 400 pg->pg_relation = pg->pg_class->pgc_relation; 401 402 /* 403 * Find the next free sequential pg id 404 */ 405 do { 406 if (pg_id_next >= bitset_capacity(&pg_id_set)) 407 bitset_resize(&pg_id_set, pg_id_next + 1); 408 id = pg_id_next++; 409 } while (bitset_in_set(&pg_id_set, id)); 410 411 pg->pg_id = id; 412 bitset_add(&pg_id_set, pg->pg_id); 413 414 /* 415 * Create the PG's CPU group 416 */ 417 group_create(&pg->pg_cpus); 418 419 /* 420 * Initialize the events ops vector 421 */ 422 pg_callback_set_defaults(pg); 423 424 return (pg); 425 } 426 427 /* 428 * Destroy a PG. 429 * This routine may block. 430 */ 431 void 432 pg_destroy(pg_t *pg) 433 { 434 ASSERT(MUTEX_HELD(&cpu_lock)); 435 436 group_destroy(&pg->pg_cpus); 437 438 /* 439 * Unassign the pg_id 440 */ 441 if (pg_id_next > pg->pg_id) 442 pg_id_next = pg->pg_id; 443 bitset_del(&pg_id_set, pg->pg_id); 444 445 /* 446 * Invoke the class specific de-allocation routine 447 */ 448 PG_FREE(pg); 449 } 450 451 /* 452 * Add the CPU "cp" to processor group "pg" 453 * This routine may block. 454 */ 455 void 456 pg_cpu_add(pg_t *pg, cpu_t *cp) 457 { 458 int err; 459 460 ASSERT(MUTEX_HELD(&cpu_lock)); 461 462 /* This adds the CPU to the PG's CPU group */ 463 err = group_add(&pg->pg_cpus, cp, GRP_RESIZE); 464 ASSERT(err == 0); 465 466 /* This adds the PG to the CPUs PG group */ 467 ASSERT(cp->cpu_pg != &bootstrap_pg_data); 468 err = group_add(&cp->cpu_pg->pgs, pg, GRP_RESIZE); 469 ASSERT(err == 0); 470 } 471 472 /* 473 * Remove "cp" from "pg". 474 * This routine may block. 475 */ 476 void 477 pg_cpu_delete(pg_t *pg, cpu_t *cp) 478 { 479 int err; 480 481 ASSERT(MUTEX_HELD(&cpu_lock)); 482 483 /* Remove the CPU from the PG */ 484 err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE); 485 ASSERT(err == 0); 486 487 /* Remove the PG from the CPU's PG group */ 488 ASSERT(cp->cpu_pg != &bootstrap_pg_data); 489 err = group_remove(&cp->cpu_pg->pgs, pg, GRP_RESIZE); 490 ASSERT(err == 0); 491 } 492 493 /* 494 * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg 495 */ 496 static cpu_pg_t * 497 pg_cpu_data_alloc(void) 498 { 499 cpu_pg_t *pgd; 500 501 pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP); 502 group_create(&pgd->pgs); 503 group_create(&pgd->cmt_pgs); 504 505 return (pgd); 506 } 507 508 /* 509 * Free the CPU's PG data. 510 */ 511 static void 512 pg_cpu_data_free(cpu_pg_t *pgd) 513 { 514 group_destroy(&pgd->pgs); 515 group_destroy(&pgd->cmt_pgs); 516 kmem_free(pgd, sizeof (cpu_pg_t)); 517 } 518 519 /* 520 * A new CPU is coming into the system, either via booting or DR. 521 * Allocate it's PG data, and notify all registered classes about 522 * the new CPU. 523 * 524 * This routine may block. 525 */ 526 void 527 pg_cpu_init(cpu_t *cp) 528 { 529 pg_cid_t i; 530 531 ASSERT(MUTEX_HELD(&cpu_lock)); 532 533 /* 534 * Allocate and size the per CPU pg data 535 */ 536 cp->cpu_pg = pg_cpu_data_alloc(); 537 538 /* 539 * Notify all registered classes about the new CPU 540 */ 541 for (i = 0; i < pg_nclasses; i++) 542 PG_CPU_INIT(i, cp); 543 } 544 545 /* 546 * This CPU is being deleted from the system. Notify the classes 547 * and free up the CPU's PG data. 548 */ 549 void 550 pg_cpu_fini(cpu_t *cp) 551 { 552 pg_cid_t i; 553 554 ASSERT(MUTEX_HELD(&cpu_lock)); 555 556 /* 557 * This can happen if the CPU coming into the system 558 * failed to power on. 559 */ 560 if (cp->cpu_pg == NULL || 561 cp->cpu_pg == &bootstrap_pg_data) 562 return; 563 564 for (i = 0; i < pg_nclasses; i++) 565 PG_CPU_FINI(i, cp); 566 567 pg_cpu_data_free(cp->cpu_pg); 568 cp->cpu_pg = NULL; 569 } 570 571 /* 572 * This CPU is becoming active (online) 573 * This routine may not block as it is called from paused CPUs 574 * context. 575 */ 576 void 577 pg_cpu_active(cpu_t *cp) 578 { 579 pg_cid_t i; 580 581 ASSERT(MUTEX_HELD(&cpu_lock)); 582 583 /* 584 * Notify all registered classes about the new CPU 585 */ 586 for (i = 0; i < pg_nclasses; i++) 587 PG_CPU_ACTIVE(i, cp); 588 } 589 590 /* 591 * This CPU is going inactive (offline) 592 * This routine may not block, as it is called from paused 593 * CPUs context. 594 */ 595 void 596 pg_cpu_inactive(cpu_t *cp) 597 { 598 pg_cid_t i; 599 600 ASSERT(MUTEX_HELD(&cpu_lock)); 601 602 /* 603 * Notify all registered classes about the new CPU 604 */ 605 for (i = 0; i < pg_nclasses; i++) 606 PG_CPU_INACTIVE(i, cp); 607 } 608 609 /* 610 * Invoked when the CPU is about to move into the partition 611 * This routine may block. 612 */ 613 void 614 pg_cpupart_in(cpu_t *cp, cpupart_t *pp) 615 { 616 int i; 617 618 ASSERT(MUTEX_HELD(&cpu_lock)); 619 620 /* 621 * Notify all registered classes that the 622 * CPU is about to enter the CPU partition 623 */ 624 for (i = 0; i < pg_nclasses; i++) 625 PG_CPUPART_IN(i, cp, pp); 626 } 627 628 /* 629 * Invoked when the CPU is about to move out of the partition 630 * This routine may block. 631 */ 632 /*ARGSUSED*/ 633 void 634 pg_cpupart_out(cpu_t *cp, cpupart_t *pp) 635 { 636 int i; 637 638 ASSERT(MUTEX_HELD(&cpu_lock)); 639 640 /* 641 * Notify all registered classes that the 642 * CPU is about to leave the CPU partition 643 */ 644 for (i = 0; i < pg_nclasses; i++) 645 PG_CPUPART_OUT(i, cp, pp); 646 } 647 648 /* 649 * Invoked when the CPU is *moving* partitions. 650 * 651 * This routine may not block, as it is called from paused CPUs 652 * context. 653 */ 654 void 655 pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) 656 { 657 int i; 658 659 ASSERT(MUTEX_HELD(&cpu_lock)); 660 661 /* 662 * Notify all registered classes that the 663 * CPU is about to leave the CPU partition 664 */ 665 for (i = 0; i < pg_nclasses; i++) 666 PG_CPUPART_MOVE(i, cp, oldpp, newpp); 667 } 668 669 /* 670 * Return a class specific string describing a policy implemented 671 * across this PG 672 */ 673 char * 674 pg_policy_name(pg_t *pg) 675 { 676 char *str; 677 if ((str = PG_POLICY_NAME(pg)) != NULL) 678 return (str); 679 680 return ("N/A"); 681 } 682 683 /* 684 * Provide the specified CPU a bootstrap pg 685 * This is needed to allow sane behaviour if any PG consuming 686 * code needs to deal with a partially initialized CPU 687 */ 688 void 689 pg_cpu_bootstrap(cpu_t *cp) 690 { 691 cp->cpu_pg = &bootstrap_pg_data; 692 } 693 694 /*ARGSUSED*/ 695 static pg_t * 696 pg_alloc_default(pg_class_t class) 697 { 698 return (kmem_zalloc(sizeof (pg_t), KM_SLEEP)); 699 } 700 701 /*ARGSUSED*/ 702 static void 703 pg_free_default(struct pg *pg) 704 { 705 kmem_free(pg, sizeof (pg_t)); 706 } 707 708 static void 709 pg_null_op() 710 { 711 } 712 713 /* 714 * Invoke the "thread switch" callback for each of the CPU's PGs 715 * This is invoked from the dispatcher swtch() routine, which is called 716 * when a thread running an a CPU should switch to another thread. 717 * "cp" is the CPU on which the thread switch is happening 718 * "now" is an unscaled hrtime_t timestamp taken in swtch() 719 * "old" and "new" are the outgoing and incoming threads, respectively. 720 */ 721 void 722 pg_ev_thread_swtch(struct cpu *cp, hrtime_t now, kthread_t *old, kthread_t *new) 723 { 724 int i, sz; 725 group_t *grp; 726 pg_t *pg; 727 728 grp = &cp->cpu_pg->pgs; 729 sz = GROUP_SIZE(grp); 730 for (i = 0; i < sz; i++) { 731 pg = GROUP_ACCESS(grp, i); 732 pg->pg_cb.thread_swtch(pg, cp, now, old, new); 733 } 734 } 735 736 /* 737 * Invoke the "thread remain" callback for each of the CPU's PGs. 738 * This is called from the dispatcher's swtch() routine when a thread 739 * running on the CPU "cp" is switching to itself, which can happen as an 740 * artifact of the thread's timeslice expiring. 741 */ 742 void 743 pg_ev_thread_remain(struct cpu *cp, kthread_t *t) 744 { 745 int i, sz; 746 group_t *grp; 747 pg_t *pg; 748 749 grp = &cp->cpu_pg->pgs; 750 sz = GROUP_SIZE(grp); 751 for (i = 0; i < sz; i++) { 752 pg = GROUP_ACCESS(grp, i); 753 pg->pg_cb.thread_remain(pg, cp, t); 754 } 755 } 756