1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cpu_pm.h> 27 #include <sys/cmn_err.h> 28 #include <sys/sdt.h> 29 30 /* 31 * Solaris Event Based CPU Power Manager 32 * 33 * This file implements platform independent event based CPU power management. 34 * When CPUs are configured into the system, the CMT scheduling subsystem will 35 * query the platform to determine if the CPU belongs to any power management 36 * domains. That is, sets of CPUs that share power management states. 37 * 38 * Active Power Management domains represent a group of CPUs across which the 39 * Operating System can request speed changes (which may in turn result 40 * in voltage changes). This allows the operating system to trade off 41 * performance for power savings. 42 * 43 * Idle Power Management domains can enter power savings states when they are 44 * unutilized. These states allow the Operating System to trade off power 45 * for performance (in the form of latency to transition from the idle state 46 * to an active one). 47 * 48 * For each active and idle power domain the CMT subsystem instantiates, a 49 * cpupm_domain_t structure is created. As the dispatcher schedules threads 50 * to run on the system's CPUs, it will also track the utilization of the 51 * enumerated power domains. Significant changes in utilization will result 52 * in the dispatcher sending the power manager events that relate to the 53 * utilization of the power domain. The power manager recieves the events, 54 * and in the context of the policy objectives in force, may decide to request 55 * the domain's power/performance state be changed. 56 * 57 * Under the "elastic" CPUPM policy, when the utilization rises, the CPU power 58 * manager will request the CPUs in the domain run at their fastest (and most 59 * power consuming) state. When the domain becomes idle (utilization at zero), 60 * the power manager will request that the CPUs run at a speed that saves the 61 * most power. 62 * 63 * The advantage of this scheme, is that the CPU power manager working with the 64 * dispatcher can be extremely responsive to changes in utilization. Optimizing 65 * for performance in the presence of utilization, and power savings in the 66 * presence of idleness. Such close collaboration with the dispatcher has other 67 * benefits that will play out in the form of more sophisticated power / 68 * performance policy in the near future. 69 * 70 * Avoiding state thrashing in the presence of transient periods of utilization 71 * and idleness while still being responsive to non-transient periods is key. 72 * The power manager implmeents several "governors" that are used to throttle 73 * state transitions when a significant amount of transient idle or transient 74 * work is detected. 75 * 76 * Kernel background activity (e.g. taskq threads) are by far the most common 77 * form of transient utilization. Ungoverned in the face of this utililzation, 78 * hundreds of state transitions per second would result on an idle system. 79 * 80 * Transient idleness is common when a thread briefly yields the CPU to 81 * wait for an event elsewhere in the system. Where the idle period is short 82 * enough, the overhead associated with making the state transition doesn't 83 * justify the power savings. 84 */ 85 86 static cpupm_domain_t *cpupm_domains = NULL; 87 88 /* 89 * Uninitialized state of CPU power management is disabled 90 */ 91 cpupm_policy_t cpupm_policy = CPUPM_POLICY_DISABLED; 92 93 /* 94 * Periods of utilization lasting less than this time interval are characterized 95 * as transient. State changes associated with transient work are considered 96 * to be mispredicted. That is, it's not worth raising and lower power states 97 * where the utilization lasts for less than this interval. 98 */ 99 hrtime_t cpupm_tw_predict_interval; 100 101 /* 102 * Periods of idleness lasting less than this time interval are characterized 103 * as transient. State changes associated with transient idle are considered 104 * to be mispredicted. That is, it's not worth lowering and raising power 105 * states where the idleness lasts for less than this interval. 106 */ 107 hrtime_t cpupm_ti_predict_interval; 108 109 /* 110 * Number of mispredictions after which future transitions will be governed. 111 */ 112 int cpupm_mispredict_thresh = 2; 113 114 /* 115 * Likewise, the number of mispredicted governed transitions after which the 116 * governor will be removed. 117 */ 118 int cpupm_mispredict_gov_thresh = 10; 119 120 /* 121 * The transient work and transient idle prediction intervals are initialized 122 * to be some multiple of the amount of time it takes to transition a power 123 * domain from the highest to the lowest power state, and back again, which 124 * is measured. 125 * 126 * The default values of those multiples are specified here. Tuning them higher 127 * will result in the transient work, and transient idle governors being used 128 * more aggresively, which limits the frequency of state transitions at the 129 * expense of performance and power savings, respectively. 130 */ 131 #define CPUPM_TI_GOV_DEFAULT_MULTIPLE 600 132 #define CPUPM_TW_GOV_DEFAULT_MULTIPLE 25 133 134 /* 135 * Number of high=>low=>high measurements performed, of which the average 136 * is taken. 137 */ 138 #define CPUPM_BENCHMARK_ITERS 5 139 140 int cpupm_ti_gov_multiple = CPUPM_TI_GOV_DEFAULT_MULTIPLE; 141 int cpupm_tw_gov_multiple = CPUPM_TW_GOV_DEFAULT_MULTIPLE; 142 143 144 static int cpupm_governor_initialize(void); 145 static void cpupm_state_change_global(cpupm_dtype_t, cpupm_state_name_t); 146 147 cpupm_policy_t 148 cpupm_get_policy(void) 149 { 150 return (cpupm_policy); 151 } 152 153 int 154 cpupm_set_policy(cpupm_policy_t new_policy) 155 { 156 static int gov_init = 0; 157 int result = 0; 158 159 mutex_enter(&cpu_lock); 160 if (new_policy == cpupm_policy) { 161 mutex_exit(&cpu_lock); 162 return (result); 163 } 164 165 /* 166 * Pausing CPUs causes a high priority thread to be scheduled 167 * on all other CPUs (besides the current one). This locks out 168 * other CPUs from making CPUPM state transitions. 169 */ 170 switch (new_policy) { 171 case CPUPM_POLICY_DISABLED: 172 pause_cpus(NULL); 173 cpupm_policy = CPUPM_POLICY_DISABLED; 174 start_cpus(); 175 176 result = cmt_pad_disable(PGHW_POW_ACTIVE); 177 178 /* 179 * Once PAD has been enabled, it should always be possible 180 * to disable it. 181 */ 182 ASSERT(result == 0); 183 184 /* 185 * Bring all the active power domains to the maximum 186 * performance state. 187 */ 188 cpupm_state_change_global(CPUPM_DTYPE_ACTIVE, 189 CPUPM_STATE_MAX_PERF); 190 191 break; 192 case CPUPM_POLICY_ELASTIC: 193 194 result = cmt_pad_enable(PGHW_POW_ACTIVE); 195 if (result < 0) { 196 /* 197 * Failed to enable PAD across the active power 198 * domains, which may well be because none were 199 * enumerated. 200 */ 201 break; 202 } 203 204 pause_cpus(NULL); 205 /* 206 * Attempt to initialize the governor parameters the first 207 * time through. 208 */ 209 if (gov_init == 0) { 210 result = cpupm_governor_initialize(); 211 if (result == 0) { 212 gov_init = 1; 213 } else { 214 /* 215 * Failed to initialize the governor parameters 216 */ 217 start_cpus(); 218 break; 219 } 220 } 221 cpupm_policy = CPUPM_POLICY_ELASTIC; 222 start_cpus(); 223 224 break; 225 default: 226 cmn_err(CE_WARN, "Attempt to set unknown CPUPM policy %d\n", 227 new_policy); 228 ASSERT(0); 229 break; 230 } 231 mutex_exit(&cpu_lock); 232 233 return (result); 234 } 235 236 /* 237 * Look for an existing power domain 238 */ 239 static cpupm_domain_t * 240 cpupm_domain_find(id_t id, cpupm_dtype_t type) 241 { 242 ASSERT(MUTEX_HELD(&cpu_lock)); 243 244 cpupm_domain_t *dom; 245 246 dom = cpupm_domains; 247 while (dom != NULL) { 248 if (id == dom->cpd_id && type == dom->cpd_type) 249 return (dom); 250 dom = dom->cpd_next; 251 } 252 return (NULL); 253 } 254 255 /* 256 * Create a new domain 257 */ 258 static cpupm_domain_t * 259 cpupm_domain_create(id_t id, cpupm_dtype_t type) 260 { 261 cpupm_domain_t *dom; 262 263 ASSERT(MUTEX_HELD(&cpu_lock)); 264 265 dom = kmem_zalloc(sizeof (cpupm_domain_t), KM_SLEEP); 266 dom->cpd_id = id; 267 dom->cpd_type = type; 268 269 /* Link into the known domain list */ 270 dom->cpd_next = cpupm_domains; 271 cpupm_domains = dom; 272 273 return (dom); 274 } 275 276 static void 277 cpupm_domain_state_enum(struct cpu *cp, cpupm_domain_t *dom) 278 { 279 /* 280 * In the envent we're enumerating because the domain's state 281 * configuration has changed, toss any existing states. 282 */ 283 if (dom->cpd_nstates > 0) { 284 kmem_free(dom->cpd_states, 285 sizeof (cpupm_state_t) * dom->cpd_nstates); 286 dom->cpd_nstates = 0; 287 } 288 289 /* 290 * Query to determine the number of states, allocate storage 291 * large enough to hold the state information, and pass it back 292 * to the platform driver to complete the enumeration. 293 */ 294 dom->cpd_nstates = cpupm_plat_state_enumerate(cp, dom->cpd_type, NULL); 295 296 if (dom->cpd_nstates == 0) 297 return; 298 299 dom->cpd_states = 300 kmem_zalloc(dom->cpd_nstates * sizeof (cpupm_state_t), KM_SLEEP); 301 (void) cpupm_plat_state_enumerate(cp, dom->cpd_type, dom->cpd_states); 302 } 303 304 /* 305 * Initialize the specified type of power domain on behalf of the CPU 306 */ 307 cpupm_domain_t * 308 cpupm_domain_init(struct cpu *cp, cpupm_dtype_t type) 309 { 310 cpupm_domain_t *dom; 311 id_t did; 312 313 ASSERT(MUTEX_HELD(&cpu_lock)); 314 315 /* 316 * Instantiate the domain if it doesn't already exist 317 * and enumerate its power states. 318 */ 319 did = cpupm_domain_id(cp, type); 320 dom = cpupm_domain_find(did, type); 321 if (dom == NULL) { 322 dom = cpupm_domain_create(did, type); 323 cpupm_domain_state_enum(cp, dom); 324 } 325 326 /* 327 * Named state initialization 328 */ 329 if (type == CPUPM_DTYPE_ACTIVE) { 330 /* 331 * For active power domains, the highest performance 332 * state is defined as first state returned from 333 * the domain enumeration. 334 */ 335 dom->cpd_named_states[CPUPM_STATE_MAX_PERF] = 336 &dom->cpd_states[0]; 337 dom->cpd_named_states[CPUPM_STATE_LOW_POWER] = 338 &dom->cpd_states[dom->cpd_nstates - 1]; 339 340 /* 341 * Begin by assuming CPU is running at the max perf state. 342 */ 343 dom->cpd_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 344 } 345 346 return (dom); 347 } 348 349 /* 350 * Return the id associated with the given type of domain 351 * to which cp belongs 352 */ 353 id_t 354 cpupm_domain_id(struct cpu *cp, cpupm_dtype_t type) 355 { 356 return (cpupm_plat_domain_id(cp, type)); 357 } 358 359 /* 360 * Initiate a state change for the specified domain on behalf of cp 361 */ 362 int 363 cpupm_change_state(struct cpu *cp, cpupm_domain_t *dom, cpupm_state_t *state) 364 { 365 if (cpupm_plat_change_state(cp, state) < 0) 366 return (-1); 367 368 DTRACE_PROBE2(cpupm__change__state, 369 cpupm_domain_t *, dom, 370 cpupm_state_t *, state); 371 372 dom->cpd_state = state; 373 return (0); 374 } 375 376 /* 377 * Interface into the CPU power manager to indicate a significant change 378 * in utilization of the specified active power domain 379 */ 380 void 381 cpupm_utilization_event(struct cpu *cp, hrtime_t now, cpupm_domain_t *dom, 382 cpupm_util_event_t event) 383 { 384 cpupm_state_t *new_state = NULL; 385 hrtime_t last; 386 387 if (cpupm_policy == CPUPM_POLICY_DISABLED) { 388 return; 389 } 390 391 /* 392 * What follows is a simple elastic power state management policy. 393 * 394 * If the utilization has become non-zero, and the domain was 395 * previously at it's lowest power state, then transition it 396 * to the highest state in the spirit of "race to idle". 397 * 398 * If the utilization has dropped to zero, then transition the 399 * domain to its lowest power state. 400 * 401 * Statistics are maintained to implement governors to reduce state 402 * transitions resulting from either transient work, or periods of 403 * transient idleness on the domain. 404 */ 405 switch (event) { 406 case CPUPM_DOM_REMAIN_BUSY: 407 408 /* 409 * We've received an event that the domain is running a thread 410 * that's made it to the end of it's time slice. If we are at 411 * low power, then raise it. If the transient work governor 412 * is engaged, then remove it. 413 */ 414 if (dom->cpd_state == 415 dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { 416 new_state = 417 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 418 if (dom->cpd_tw_governed == B_TRUE) { 419 dom->cpd_tw_governed = B_FALSE; 420 dom->cpd_tw = 0; 421 } 422 } 423 break; 424 425 case CPUPM_DOM_BUSY_FROM_IDLE: 426 last = dom->cpd_last_lower; 427 dom->cpd_last_raise = now; 428 429 DTRACE_PROBE3(cpupm__raise__req, 430 cpupm_domain_t *, dom, 431 hrtime_t, last, 432 hrtime_t, now); 433 434 if (dom->cpd_state == 435 dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { 436 437 /* 438 * There's non-zero utilization, and the domain is 439 * running in the lower power state. Before we 440 * consider raising power, perform some book keeping 441 * for the transient idle governor. 442 */ 443 if (dom->cpd_ti_governed == B_FALSE) { 444 if ((now - last) < cpupm_ti_predict_interval) { 445 /* 446 * We're raising the domain power and 447 * we *just* lowered it. Consider 448 * this a mispredicted power state 449 * transition due to a transient 450 * idle period. 451 * 452 * Note: The presence of enough 453 * transient work across the domain can 454 * result in frequent transient idle 455 * periods. We don't want the ti 456 * governor being installed as a side 457 * effect of transient work, so the ti 458 * governor is left alone if the tw 459 * governor is already installed. 460 */ 461 if (dom->cpd_tw_governed == B_FALSE && 462 ++dom->cpd_ti >= 463 cpupm_mispredict_thresh) { 464 /* 465 * There's enough transient 466 * idle transitions to 467 * justify governing future 468 * lowering requests. 469 */ 470 dom->cpd_ti_governed = B_TRUE; 471 dom->cpd_ti = 0; 472 DTRACE_PROBE1( 473 cpupm__ti__governed, 474 cpupm_domain_t *, dom); 475 } 476 } else { 477 /* 478 * We correctly predicted the last 479 * lowering. 480 */ 481 dom->cpd_ti = 0; 482 } 483 } 484 if (dom->cpd_tw_governed == B_TRUE) { 485 /* 486 * Raise requests are governed due to 487 * transient work. 488 */ 489 DTRACE_PROBE1(cpupm__raise__governed, 490 cpupm_domain_t *, dom); 491 492 /* 493 * It's likely that we'll be governed for a 494 * while. If the transient idle governor is 495 * also in place, examine the preceeding idle 496 * interval to see if that still makes sense. 497 */ 498 if (dom->cpd_ti_governed == B_TRUE && 499 ((now - last) >= 500 cpupm_ti_predict_interval)) { 501 if (++dom->cpd_ti >= 502 cpupm_mispredict_gov_thresh) { 503 dom->cpd_ti_governed = 504 B_FALSE; 505 dom->cpd_ti = 0; 506 } 507 } 508 return; 509 } 510 /* 511 * Prepare to transition to the higher power state 512 */ 513 new_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 514 515 } else if (dom->cpd_state == 516 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { 517 518 /* 519 * Utilization is non-zero, and we're already running 520 * in the higher power state. Take this opportunity to 521 * perform some book keeping if the last lowering 522 * request was governed. 523 */ 524 if (dom->cpd_ti_governed == B_TRUE) { 525 if ((now - last) >= cpupm_ti_predict_interval) { 526 /* 527 * The domain is transient idle 528 * governed, and we mispredicted 529 * governing the last lowering request. 530 */ 531 if (++dom->cpd_ti >= 532 cpupm_mispredict_gov_thresh) { 533 /* 534 * There's enough non-transient 535 * idle periods to justify 536 * removing the governor. 537 */ 538 dom->cpd_ti_governed = B_FALSE; 539 dom->cpd_ti = 0; 540 DTRACE_PROBE1( 541 cpupm__ti__ungoverned, 542 cpupm_domain_t *, dom); 543 } 544 } else { 545 /* 546 * Correctly predicted governing the 547 * last lowering request. 548 */ 549 dom->cpd_ti = 0; 550 } 551 } 552 } 553 break; 554 555 case CPUPM_DOM_IDLE_FROM_BUSY: 556 last = dom->cpd_last_raise; 557 dom->cpd_last_lower = now; 558 559 DTRACE_PROBE3(cpupm__lower__req, 560 cpupm_domain_t *, dom, 561 hrtime_t, last, 562 hrtime_t, now); 563 564 if (dom->cpd_state == 565 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { 566 567 /* 568 * The domain is idle, and is running in the highest 569 * performance state. Before we consider lowering power, 570 * perform some book keeping for the transient work 571 * governor. 572 */ 573 if (dom->cpd_tw_governed == B_FALSE) { 574 if ((now - last) < cpupm_tw_predict_interval) { 575 /* 576 * We're lowering the domain power and 577 * we *just* raised it. Consider the 578 * last raise mispredicted due to 579 * transient work. 580 */ 581 if (++dom->cpd_tw >= 582 cpupm_mispredict_thresh) { 583 /* 584 * There's enough transient idle 585 * transitions to justify 586 * governing future lowering 587 * requests. 588 */ 589 dom->cpd_tw_governed = B_TRUE; 590 dom->cpd_tw = 0; 591 DTRACE_PROBE1( 592 cpupm__tw__governed, 593 cpupm_domain_t *, dom); 594 } 595 } else { 596 /* 597 * We correctly predicted during the 598 * last raise. 599 */ 600 dom->cpd_tw = 0; 601 } 602 } 603 if (dom->cpd_ti_governed == B_TRUE) { 604 /* 605 * Lowering requests are governed due to 606 * transient idleness. 607 */ 608 DTRACE_PROBE1(cpupm__lowering__governed, 609 cpupm_domain_t *, dom); 610 611 /* 612 * It's likely that we'll be governed for a 613 * while. If the transient work governor is 614 * also in place, examine the preceeding busy 615 * interval to see if that still makes sense. 616 */ 617 if (dom->cpd_tw_governed == B_TRUE && 618 ((now - last) >= 619 cpupm_tw_predict_interval)) { 620 if (++dom->cpd_tw >= 621 cpupm_mispredict_gov_thresh) { 622 dom->cpd_tw_governed = 623 B_FALSE; 624 dom->cpd_tw = 0; 625 } 626 } 627 return; 628 } 629 630 /* 631 * Prepare to transition to a lower power state. 632 */ 633 new_state = 634 dom->cpd_named_states[CPUPM_STATE_LOW_POWER]; 635 636 } else if (dom->cpd_state == 637 dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { 638 639 /* 640 * The domain is idle, and we're already running in 641 * the lower power state. Take this opportunity to 642 * perform some book keeping if the last raising 643 * request was governed. 644 */ 645 if (dom->cpd_tw_governed == B_TRUE) { 646 if ((now - last) >= cpupm_tw_predict_interval) { 647 /* 648 * The domain is transient work 649 * governed, and we mispredicted 650 * governing the last raising request. 651 */ 652 if (++dom->cpd_tw >= 653 cpupm_mispredict_gov_thresh) { 654 /* 655 * There's enough non-transient 656 * work to justify removing 657 * the governor. 658 */ 659 dom->cpd_tw_governed = B_FALSE; 660 dom->cpd_tw = 0; 661 DTRACE_PROBE1( 662 cpupm__tw__ungoverned, 663 cpupm_domain_t *, dom); 664 } 665 } else { 666 /* 667 * We correctly predicted governing 668 * the last raise. 669 */ 670 dom->cpd_tw = 0; 671 } 672 } 673 } 674 break; 675 } 676 /* 677 * Change the power state 678 * Not much currently done if this doesn't succeed 679 */ 680 if (new_state) 681 (void) cpupm_change_state(cp, dom, new_state); 682 } 683 684 685 /* 686 * Interface called by platforms to dynamically change the 687 * MAX performance cpupm state 688 */ 689 void 690 cpupm_redefine_max_activepwr_state(struct cpu *cp, int max_perf_level) 691 { 692 cpupm_domain_t *dom; 693 id_t did; 694 cpupm_dtype_t type = CPUPM_DTYPE_ACTIVE; 695 boolean_t change_state = B_FALSE; 696 cpupm_state_t *new_state = NULL; 697 698 did = cpupm_domain_id(cp, type); 699 mutex_enter(&cpu_lock); 700 dom = cpupm_domain_find(did, type); 701 mutex_exit(&cpu_lock); 702 703 /* 704 * Can use a lock to avoid changing the power state of the cpu when 705 * CPUPM_STATE_MAX_PERF is getting changed. 706 * Since the occurance of events to change MAX_PERF is not frequent, 707 * it may not be a good idea to overburden with locks. In the worst 708 * case, for one cycle the power may not get changed to the required 709 * level 710 */ 711 if (dom != NULL) { 712 if (dom->cpd_state == 713 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { 714 change_state = B_TRUE; 715 } 716 717 /* 718 * If an out of range level is passed, use the lowest supported 719 * speed. 720 */ 721 if (max_perf_level >= dom->cpd_nstates && 722 dom->cpd_nstates > 1) { 723 max_perf_level = dom->cpd_nstates - 1; 724 } 725 726 dom->cpd_named_states[CPUPM_STATE_MAX_PERF] = 727 &dom->cpd_states[max_perf_level]; 728 729 /* 730 * If the current state is MAX_PERF, change the current state 731 * to the new MAX_PERF 732 */ 733 if (change_state) { 734 new_state = 735 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 736 if (new_state) { 737 (void) cpupm_change_state(cp, dom, new_state); 738 } 739 } 740 } 741 } 742 743 /* 744 * Benchmark some power state transitions and use the transition latencies as 745 * a basis for initializing parameters for the transient idle and transient 746 * work governors. 747 * 748 * Returns 0 on success or -1 if the governor parameters could not be 749 * initialized. 750 */ 751 static int 752 cpupm_governor_initialize(void) 753 { 754 cpu_t *cp = CPU; 755 cpupm_domain_t *dom; 756 cpupm_state_t *low, *high; 757 id_t did; 758 hrtime_t start, delta, deltas = 0; 759 int iterations; 760 761 did = cpupm_domain_id(cp, CPUPM_DTYPE_ACTIVE); 762 if (did == CPUPM_NO_DOMAIN) 763 return (-1); 764 765 dom = cpupm_domain_find(did, CPUPM_DTYPE_ACTIVE); 766 if (dom == NULL) 767 return (-1); 768 769 low = dom->cpd_named_states[CPUPM_STATE_LOW_POWER]; 770 high = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 771 772 for (iterations = 0; iterations < CPUPM_BENCHMARK_ITERS; iterations++) { 773 774 /* 775 * Measure the amount of time it takes to transition the 776 * domain down to the lowest, and back to the highest power 777 * state. 778 */ 779 start = gethrtime_unscaled(); 780 (void) cpupm_change_state(cp, dom, low); 781 (void) cpupm_change_state(cp, dom, high); 782 delta = gethrtime_unscaled() - start; 783 784 DTRACE_PROBE1(cpupm__benchmark__latency, 785 hrtime_t, delta); 786 787 deltas += delta; 788 } 789 790 /* 791 * Figure the average latency, and tune the transient work and 792 * transient idle prediction intervals accordingly. 793 */ 794 delta = deltas / iterations; 795 796 cpupm_ti_predict_interval = delta * cpupm_ti_gov_multiple; 797 cpupm_tw_predict_interval = delta * cpupm_tw_gov_multiple; 798 799 return (0); 800 } 801 802 /* 803 * Initiate a state change in all CPUPM domain instances of the specified type 804 */ 805 static void 806 cpupm_state_change_global(cpupm_dtype_t type, cpupm_state_name_t state) 807 { 808 cpu_t *cp; 809 pg_cmt_t *pwr_pg; 810 cpupm_domain_t *dom; 811 group_t *hwset; 812 group_iter_t giter; 813 pg_cpu_itr_t cpu_iter; 814 pghw_type_t hw; 815 816 ASSERT(MUTEX_HELD(&cpu_lock)); 817 818 switch (type) { 819 case CPUPM_DTYPE_ACTIVE: 820 hw = PGHW_POW_ACTIVE; 821 break; 822 default: 823 /* 824 * Power domain types other than "active" unsupported. 825 */ 826 ASSERT(type == CPUPM_DTYPE_ACTIVE); 827 return; 828 } 829 830 if ((hwset = pghw_set_lookup(hw)) == NULL) 831 return; 832 833 /* 834 * Iterate over the power domains 835 */ 836 group_iter_init(&giter); 837 while ((pwr_pg = group_iterate(hwset, &giter)) != NULL) { 838 839 dom = (cpupm_domain_t *)pwr_pg->cmt_pg.pghw_handle; 840 841 /* 842 * Iterate over the CPUs in each domain 843 */ 844 PG_CPU_ITR_INIT(pwr_pg, cpu_iter); 845 while ((cp = pg_cpu_next(&cpu_iter)) != NULL) { 846 (void) cpupm_change_state(cp, dom, 847 dom->cpd_named_states[state]); 848 } 849 } 850 } 851