1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cpu_pm.h> 27 #include <sys/cmn_err.h> 28 #include <sys/sdt.h> 29 30 /* 31 * Solaris Event Based CPU Power Manager 32 * 33 * This file implements platform independent event based CPU power management. 34 * When CPUs are configured into the system, the CMT scheduling subsystem will 35 * query the platform to determine if the CPU belongs to any power management 36 * domains. That is, sets of CPUs that share power management states. 37 * 38 * Active Power Management domains represent a group of CPUs across which the 39 * Operating System can request speed changes (which may in turn result 40 * in voltage changes). This allows the operating system to trade off 41 * performance for power savings. 42 * 43 * Idle Power Management domains can enter power savings states when they are 44 * unutilized. These states allow the Operating System to trade off power 45 * for performance (in the form of latency to transition from the idle state 46 * to an active one). 47 * 48 * For each active and idle power domain the CMT subsystem instantiates, a 49 * cpupm_domain_t structure is created. As the dispatcher schedules threads 50 * to run on the system's CPUs, it will also track the utilization of the 51 * enumerated power domains. Significant changes in utilization will result 52 * in the dispatcher sending the power manager events that relate to the 53 * utilization of the power domain. The power manager recieves the events, 54 * and in the context of the policy objectives in force, may decide to request 55 * the domain's power/performance state be changed. 56 * 57 * Under the "elastic" CPUPM policy, when the utilization rises, the CPU power 58 * manager will request the CPUs in the domain run at their fastest (and most 59 * power consuming) state. When the domain becomes idle (utilization at zero), 60 * the power manager will request that the CPUs run at a speed that saves the 61 * most power. 62 * 63 * The advantage of this scheme, is that the CPU power manager working with the 64 * dispatcher can be extremely responsive to changes in utilization. Optimizing 65 * for performance in the presence of utilization, and power savings in the 66 * presence of idleness. Such close collaboration with the dispatcher has other 67 * benefits that will play out in the form of more sophisticated power / 68 * performance policy in the near future. 69 * 70 * Avoiding state thrashing in the presence of transient periods of utilization 71 * and idleness while still being responsive to non-transient periods is key. 72 * The power manager implmeents several "governors" that are used to throttle 73 * state transitions when a significant amount of transient idle or transient 74 * work is detected. 75 * 76 * Kernel background activity (e.g. taskq threads) are by far the most common 77 * form of transient utilization. Ungoverned in the face of this utililzation, 78 * hundreds of state transitions per second would result on an idle system. 79 * 80 * Transient idleness is common when a thread briefly yields the CPU to 81 * wait for an event elsewhere in the system. Where the idle period is short 82 * enough, the overhead associated with making the state transition doesn't 83 * justify the power savings. 84 */ 85 86 static cpupm_domain_t *cpupm_domains = NULL; 87 88 /* 89 * Uninitialized state of CPU power management is disabled 90 */ 91 cpupm_policy_t cpupm_policy = CPUPM_POLICY_DISABLED; 92 93 /* 94 * Periods of utilization lasting less than this time interval are characterized 95 * as transient. State changes associated with transient work are considered 96 * to be mispredicted. That is, it's not worth raising and lower power states 97 * where the utilization lasts for less than this interval. 98 */ 99 hrtime_t cpupm_tw_predict_interval; 100 101 /* 102 * Periods of idleness lasting less than this time interval are characterized 103 * as transient. State changes associated with transient idle are considered 104 * to be mispredicted. That is, it's not worth lowering and raising power 105 * states where the idleness lasts for less than this interval. 106 */ 107 hrtime_t cpupm_ti_predict_interval; 108 109 /* 110 * Number of mispredictions after which future transitions will be governed. 111 */ 112 int cpupm_mispredict_thresh = 2; 113 114 /* 115 * Likewise, the number of mispredicted governed transitions after which the 116 * governor will be removed. 117 */ 118 int cpupm_mispredict_gov_thresh = 10; 119 120 /* 121 * The transient work and transient idle prediction intervals are initialized 122 * to be some multiple of the amount of time it takes to transition a power 123 * domain from the highest to the lowest power state, and back again, which 124 * is measured. 125 * 126 * The default values of those multiples are specified here. Tuning them higher 127 * will result in the transient work, and transient idle governors being used 128 * more aggresively, which limits the frequency of state transitions at the 129 * expense of performance and power savings, respectively. 130 */ 131 #define CPUPM_TI_GOV_DEFAULT_MULTIPLE 600 132 #define CPUPM_TW_GOV_DEFAULT_MULTIPLE 25 133 134 /* 135 * Number of high=>low=>high measurements performed, of which the average 136 * is taken. 137 */ 138 #define CPUPM_BENCHMARK_ITERS 5 139 140 int cpupm_ti_gov_multiple = CPUPM_TI_GOV_DEFAULT_MULTIPLE; 141 int cpupm_tw_gov_multiple = CPUPM_TW_GOV_DEFAULT_MULTIPLE; 142 143 144 static int cpupm_governor_initialize(void); 145 static void cpupm_state_change_global(cpupm_dtype_t, cpupm_state_name_t); 146 147 cpupm_policy_t 148 cpupm_get_policy(void) 149 { 150 return (cpupm_policy); 151 } 152 153 int 154 cpupm_set_policy(cpupm_policy_t new_policy) 155 { 156 static int gov_init = 0; 157 int result = 0; 158 159 mutex_enter(&cpu_lock); 160 if (new_policy == cpupm_policy) { 161 mutex_exit(&cpu_lock); 162 return (result); 163 } 164 165 /* 166 * Pausing CPUs causes a high priority thread to be scheduled 167 * on all other CPUs (besides the current one). This locks out 168 * other CPUs from making CPUPM state transitions. 169 */ 170 switch (new_policy) { 171 case CPUPM_POLICY_DISABLED: 172 pause_cpus(NULL); 173 cpupm_policy = CPUPM_POLICY_DISABLED; 174 start_cpus(); 175 176 result = cmt_pad_disable(PGHW_POW_ACTIVE); 177 178 /* 179 * Once PAD has been enabled, it should always be possible 180 * to disable it. 181 */ 182 ASSERT(result == 0); 183 184 /* 185 * Bring all the active power domains to the maximum 186 * performance state. 187 */ 188 cpupm_state_change_global(CPUPM_DTYPE_ACTIVE, 189 CPUPM_STATE_MAX_PERF); 190 191 break; 192 case CPUPM_POLICY_ELASTIC: 193 194 result = cmt_pad_enable(PGHW_POW_ACTIVE); 195 if (result < 0) { 196 /* 197 * Failed to enable PAD across the active power 198 * domains, which may well be because none were 199 * enumerated. 200 */ 201 break; 202 } 203 204 pause_cpus(NULL); 205 /* 206 * Attempt to initialize the governor parameters the first 207 * time through. 208 */ 209 if (gov_init == 0) { 210 result = cpupm_governor_initialize(); 211 if (result == 0) { 212 gov_init = 1; 213 } else { 214 /* 215 * Failed to initialize the governor parameters 216 */ 217 start_cpus(); 218 break; 219 } 220 } 221 cpupm_policy = CPUPM_POLICY_ELASTIC; 222 start_cpus(); 223 224 break; 225 default: 226 cmn_err(CE_WARN, "Attempt to set unknown CPUPM policy %d\n", 227 new_policy); 228 ASSERT(0); 229 break; 230 } 231 mutex_exit(&cpu_lock); 232 233 return (result); 234 } 235 236 /* 237 * Look for an existing power domain 238 */ 239 static cpupm_domain_t * 240 cpupm_domain_find(id_t id, cpupm_dtype_t type) 241 { 242 ASSERT(MUTEX_HELD(&cpu_lock)); 243 244 cpupm_domain_t *dom; 245 246 dom = cpupm_domains; 247 while (dom != NULL) { 248 if (id == dom->cpd_id && type == dom->cpd_type) 249 return (dom); 250 dom = dom->cpd_next; 251 } 252 return (NULL); 253 } 254 255 /* 256 * Create a new domain 257 */ 258 static cpupm_domain_t * 259 cpupm_domain_create(id_t id, cpupm_dtype_t type) 260 { 261 cpupm_domain_t *dom; 262 263 ASSERT(MUTEX_HELD(&cpu_lock)); 264 265 dom = kmem_zalloc(sizeof (cpupm_domain_t), KM_SLEEP); 266 dom->cpd_id = id; 267 dom->cpd_type = type; 268 269 /* Link into the known domain list */ 270 dom->cpd_next = cpupm_domains; 271 cpupm_domains = dom; 272 273 return (dom); 274 } 275 276 static void 277 cpupm_domain_state_enum(struct cpu *cp, cpupm_domain_t *dom) 278 { 279 /* 280 * In the envent we're enumerating because the domain's state 281 * configuration has changed, toss any existing states. 282 */ 283 if (dom->cpd_nstates > 0) { 284 kmem_free(dom->cpd_states, 285 sizeof (cpupm_state_t) * dom->cpd_nstates); 286 dom->cpd_nstates = 0; 287 } 288 289 /* 290 * Query to determine the number of states, allocate storage 291 * large enough to hold the state information, and pass it back 292 * to the platform driver to complete the enumeration. 293 */ 294 dom->cpd_nstates = cpupm_plat_state_enumerate(cp, dom->cpd_type, NULL); 295 296 if (dom->cpd_nstates == 0) 297 return; 298 299 dom->cpd_states = 300 kmem_zalloc(dom->cpd_nstates * sizeof (cpupm_state_t), KM_SLEEP); 301 (void) cpupm_plat_state_enumerate(cp, dom->cpd_type, dom->cpd_states); 302 } 303 304 /* 305 * Initialize the specified type of power domain on behalf of the CPU 306 */ 307 cpupm_domain_t * 308 cpupm_domain_init(struct cpu *cp, cpupm_dtype_t type) 309 { 310 cpupm_domain_t *dom; 311 id_t did; 312 313 ASSERT(MUTEX_HELD(&cpu_lock)); 314 315 /* 316 * Instantiate the domain if it doesn't already exist 317 * and enumerate its power states. 318 */ 319 did = cpupm_domain_id(cp, type); 320 dom = cpupm_domain_find(did, type); 321 if (dom == NULL) { 322 dom = cpupm_domain_create(did, type); 323 cpupm_domain_state_enum(cp, dom); 324 } 325 326 /* 327 * Named state initialization 328 */ 329 if (type == CPUPM_DTYPE_ACTIVE) { 330 /* 331 * For active power domains, the highest performance 332 * state is defined as first state returned from 333 * the domain enumeration. 334 */ 335 dom->cpd_named_states[CPUPM_STATE_MAX_PERF] = 336 &dom->cpd_states[0]; 337 dom->cpd_named_states[CPUPM_STATE_LOW_POWER] = 338 &dom->cpd_states[dom->cpd_nstates - 1]; 339 340 /* 341 * Begin by assuming CPU is running at the max perf state. 342 */ 343 dom->cpd_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 344 } 345 346 return (dom); 347 } 348 349 /* 350 * Return the id associated with the given type of domain 351 * to which cp belongs 352 */ 353 id_t 354 cpupm_domain_id(struct cpu *cp, cpupm_dtype_t type) 355 { 356 return (cpupm_plat_domain_id(cp, type)); 357 } 358 359 /* 360 * Initiate a state change for the specified domain on behalf of cp 361 */ 362 int 363 cpupm_change_state(struct cpu *cp, cpupm_domain_t *dom, cpupm_state_t *state) 364 { 365 if (cpupm_plat_change_state(cp, state) < 0) 366 return (-1); 367 368 DTRACE_PROBE2(cpupm__change__state, 369 cpupm_domain_t *, dom, 370 cpupm_state_t *, state); 371 372 dom->cpd_state = state; 373 return (0); 374 } 375 376 /* 377 * Interface into the CPU power manager to indicate a significant change 378 * in utilization of the specified active power domain 379 */ 380 void 381 cpupm_utilization_event(struct cpu *cp, hrtime_t now, cpupm_domain_t *dom, 382 cpupm_util_event_t event) 383 { 384 cpupm_state_t *new_state = NULL; 385 hrtime_t last; 386 387 if (cpupm_policy == CPUPM_POLICY_DISABLED) { 388 return; 389 } 390 391 /* 392 * What follows is a simple elastic power state management policy. 393 * 394 * If the utilization has become non-zero, and the domain was 395 * previously at it's lowest power state, then transition it 396 * to the highest state in the spirit of "race to idle". 397 * 398 * If the utilization has dropped to zero, then transition the 399 * domain to its lowest power state. 400 * 401 * Statistics are maintained to implement governors to reduce state 402 * transitions resulting from either transient work, or periods of 403 * transient idleness on the domain. 404 */ 405 switch (event) { 406 case CPUPM_DOM_REMAIN_BUSY: 407 408 /* 409 * We've received an event that the domain is running a thread 410 * that's made it to the end of it's time slice. If we are at 411 * low power, then raise it. If the transient work governor 412 * is engaged, then remove it. 413 */ 414 if (dom->cpd_state == 415 dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { 416 new_state = 417 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 418 if (dom->cpd_tw_governed == B_TRUE) { 419 dom->cpd_tw_governed = B_FALSE; 420 dom->cpd_tw = 0; 421 } 422 } 423 break; 424 425 case CPUPM_DOM_BUSY_FROM_IDLE: 426 last = dom->cpd_last_lower; 427 dom->cpd_last_raise = now; 428 429 DTRACE_PROBE3(cpupm__raise__req, 430 cpupm_domain_t *, dom, 431 hrtime_t, last, 432 hrtime_t, now); 433 434 if (dom->cpd_state == 435 dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { 436 437 /* 438 * There's non-zero utilization, and the domain is 439 * running in the lower power state. Before we 440 * consider raising power, perform some book keeping 441 * for the transient idle governor. 442 */ 443 if (dom->cpd_ti_governed == B_FALSE) { 444 if ((now - last) < cpupm_ti_predict_interval) { 445 /* 446 * We're raising the domain power and 447 * we *just* lowered it. Consider 448 * this a mispredicted power state 449 * transition due to a transient 450 * idle period. 451 */ 452 if (++dom->cpd_ti >= 453 cpupm_mispredict_thresh) { 454 /* 455 * There's enough transient 456 * idle transitions to 457 * justify governing future 458 * lowering requests. 459 */ 460 dom->cpd_ti_governed = B_TRUE; 461 dom->cpd_ti = 0; 462 DTRACE_PROBE1( 463 cpupm__ti__governed, 464 cpupm_domain_t *, dom); 465 } 466 } else { 467 /* 468 * We correctly predicted the last 469 * lowering. 470 */ 471 dom->cpd_ti = 0; 472 } 473 } 474 if (dom->cpd_tw_governed == B_TRUE) { 475 /* 476 * Raise requests are governed due to 477 * transient work. 478 */ 479 DTRACE_PROBE1(cpupm__raise__governed, 480 cpupm_domain_t *, dom); 481 482 /* 483 * It's likely that we'll be governed for a 484 * while. If the transient idle governor is 485 * also in place, examine the preceeding idle 486 * interval to see if that still makes sense. 487 */ 488 if (dom->cpd_ti_governed == B_TRUE && 489 ((now - last) >= 490 cpupm_ti_predict_interval)) { 491 if (++dom->cpd_ti >= 492 cpupm_mispredict_gov_thresh) { 493 dom->cpd_ti_governed = 494 B_FALSE; 495 dom->cpd_ti = 0; 496 } 497 } 498 return; 499 } 500 /* 501 * Prepare to transition to the higher power state 502 */ 503 new_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 504 505 } else if (dom->cpd_state == 506 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { 507 508 /* 509 * Utilization is non-zero, and we're already running 510 * in the higher power state. Take this opportunity to 511 * perform some book keeping if the last lowering 512 * request was governed. 513 */ 514 if (dom->cpd_ti_governed == B_TRUE) { 515 if ((now - last) >= cpupm_ti_predict_interval) { 516 /* 517 * The domain is transient idle 518 * governed, and we mispredicted 519 * governing the last lowering request. 520 */ 521 if (++dom->cpd_ti >= 522 cpupm_mispredict_gov_thresh) { 523 /* 524 * There's enough non-transient 525 * idle periods to justify 526 * removing the governor. 527 */ 528 dom->cpd_ti_governed = B_FALSE; 529 dom->cpd_ti = 0; 530 DTRACE_PROBE1( 531 cpupm__ti__ungoverned, 532 cpupm_domain_t *, dom); 533 } 534 } else { 535 /* 536 * Correctly predicted governing the 537 * last lowering request. 538 */ 539 dom->cpd_ti = 0; 540 } 541 } 542 } 543 break; 544 545 case CPUPM_DOM_IDLE_FROM_BUSY: 546 last = dom->cpd_last_raise; 547 dom->cpd_last_lower = now; 548 549 DTRACE_PROBE3(cpupm__lower__req, 550 cpupm_domain_t *, dom, 551 hrtime_t, last, 552 hrtime_t, now); 553 554 if (dom->cpd_state == 555 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { 556 557 /* 558 * The domain is idle, and is running in the highest 559 * performance state. Before we consider lowering power, 560 * perform some book keeping for the transient work 561 * governor. 562 */ 563 if (dom->cpd_tw_governed == B_FALSE) { 564 if ((now - last) < cpupm_tw_predict_interval) { 565 /* 566 * We're lowering the domain power and 567 * we *just* raised it. Consider the 568 * last raise mispredicted due to 569 * transient work. 570 */ 571 if (++dom->cpd_tw >= 572 cpupm_mispredict_thresh) { 573 /* 574 * There's enough transient idle 575 * transitions to justify 576 * governing future lowering 577 * requests. 578 */ 579 dom->cpd_tw_governed = B_TRUE; 580 dom->cpd_tw = 0; 581 DTRACE_PROBE1( 582 cpupm__tw__governed, 583 cpupm_domain_t *, dom); 584 } 585 } else { 586 /* 587 * We correctly predicted during the 588 * last raise. 589 */ 590 dom->cpd_tw = 0; 591 } 592 } 593 if (dom->cpd_ti_governed == B_TRUE) { 594 /* 595 * Lowering requests are governed due to 596 * transient idleness. 597 */ 598 DTRACE_PROBE1(cpupm__lowering__governed, 599 cpupm_domain_t *, dom); 600 601 /* 602 * It's likely that we'll be governed for a 603 * while. If the transient work governor is 604 * also in place, examine the preceeding busy 605 * interval to see if that still makes sense. 606 */ 607 if (dom->cpd_tw_governed == B_TRUE && 608 ((now - last) >= 609 cpupm_tw_predict_interval)) { 610 if (++dom->cpd_tw >= 611 cpupm_mispredict_gov_thresh) { 612 dom->cpd_tw_governed = 613 B_FALSE; 614 dom->cpd_tw = 0; 615 } 616 } 617 return; 618 } 619 620 /* 621 * Prepare to transition to a lower power state. 622 */ 623 new_state = 624 dom->cpd_named_states[CPUPM_STATE_LOW_POWER]; 625 626 } else if (dom->cpd_state == 627 dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { 628 629 /* 630 * The domain is idle, and we're already running in 631 * the lower power state. Take this opportunity to 632 * perform some book keeping if the last raising 633 * request was governed. 634 */ 635 if (dom->cpd_tw_governed == B_TRUE) { 636 if ((now - last) >= cpupm_tw_predict_interval) { 637 /* 638 * The domain is transient work 639 * governed, and we mispredicted 640 * governing the last raising request. 641 */ 642 if (++dom->cpd_tw >= 643 cpupm_mispredict_gov_thresh) { 644 /* 645 * There's enough non-transient 646 * work to justify removing 647 * the governor. 648 */ 649 dom->cpd_tw_governed = B_FALSE; 650 dom->cpd_tw = 0; 651 DTRACE_PROBE1( 652 cpupm__tw__ungoverned, 653 cpupm_domain_t *, dom); 654 } 655 } else { 656 /* 657 * We correctly predicted governing 658 * the last raise. 659 */ 660 dom->cpd_tw = 0; 661 } 662 } 663 } 664 break; 665 } 666 /* 667 * Change the power state 668 * Not much currently done if this doesn't succeed 669 */ 670 if (new_state) 671 (void) cpupm_change_state(cp, dom, new_state); 672 } 673 674 675 /* 676 * Interface called by platforms to dynamically change the 677 * MAX performance cpupm state 678 */ 679 void 680 cpupm_redefine_max_activepwr_state(struct cpu *cp, int max_perf_level) 681 { 682 cpupm_domain_t *dom; 683 id_t did; 684 cpupm_dtype_t type = CPUPM_DTYPE_ACTIVE; 685 boolean_t change_state = B_FALSE; 686 cpupm_state_t *new_state = NULL; 687 688 did = cpupm_domain_id(cp, type); 689 mutex_enter(&cpu_lock); 690 dom = cpupm_domain_find(did, type); 691 mutex_exit(&cpu_lock); 692 693 /* 694 * Can use a lock to avoid changing the power state of the cpu when 695 * CPUPM_STATE_MAX_PERF is getting changed. 696 * Since the occurance of events to change MAX_PERF is not frequent, 697 * it may not be a good idea to overburden with locks. In the worst 698 * case, for one cycle the power may not get changed to the required 699 * level 700 */ 701 if (dom != NULL) { 702 if (dom->cpd_state == 703 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { 704 change_state = B_TRUE; 705 } 706 707 /* 708 * If an out of range level is passed, use the lowest supported 709 * speed. 710 */ 711 if (max_perf_level >= dom->cpd_nstates && 712 dom->cpd_nstates > 1) { 713 max_perf_level = dom->cpd_nstates - 1; 714 } 715 716 dom->cpd_named_states[CPUPM_STATE_MAX_PERF] = 717 &dom->cpd_states[max_perf_level]; 718 719 /* 720 * If the current state is MAX_PERF, change the current state 721 * to the new MAX_PERF 722 */ 723 if (change_state) { 724 new_state = 725 dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 726 if (new_state) { 727 (void) cpupm_change_state(cp, dom, new_state); 728 } 729 } 730 } 731 } 732 733 /* 734 * Benchmark some power state transitions and use the transition latencies as 735 * a basis for initializing parameters for the transient idle and transient 736 * work governors. 737 * 738 * Returns 0 on success or -1 if the governor parameters could not be 739 * initialized. 740 */ 741 static int 742 cpupm_governor_initialize(void) 743 { 744 cpu_t *cp = CPU; 745 cpupm_domain_t *dom; 746 cpupm_state_t *low, *high; 747 id_t did; 748 hrtime_t start, delta, deltas = 0; 749 int iterations; 750 751 did = cpupm_domain_id(cp, CPUPM_DTYPE_ACTIVE); 752 if (did == CPUPM_NO_DOMAIN) 753 return (-1); 754 755 dom = cpupm_domain_find(did, CPUPM_DTYPE_ACTIVE); 756 if (dom == NULL) 757 return (-1); 758 759 low = dom->cpd_named_states[CPUPM_STATE_LOW_POWER]; 760 high = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 761 762 for (iterations = 0; iterations < CPUPM_BENCHMARK_ITERS; iterations++) { 763 764 /* 765 * Measure the amount of time it takes to transition the 766 * domain down to the lowest, and back to the highest power 767 * state. 768 */ 769 start = gethrtime_unscaled(); 770 (void) cpupm_change_state(cp, dom, low); 771 (void) cpupm_change_state(cp, dom, high); 772 delta = gethrtime_unscaled() - start; 773 774 DTRACE_PROBE1(cpupm__benchmark__latency, 775 hrtime_t, delta); 776 777 deltas += delta; 778 } 779 780 /* 781 * Figure the average latency, and tune the transient work and 782 * transient idle prediction intervals accordingly. 783 */ 784 delta = deltas / iterations; 785 786 cpupm_ti_predict_interval = delta * cpupm_ti_gov_multiple; 787 cpupm_tw_predict_interval = delta * cpupm_tw_gov_multiple; 788 789 return (0); 790 } 791 792 /* 793 * Initiate a state change in all CPUPM domain instances of the specified type 794 */ 795 static void 796 cpupm_state_change_global(cpupm_dtype_t type, cpupm_state_name_t state) 797 { 798 cpu_t *cp; 799 pg_cmt_t *pwr_pg; 800 cpupm_domain_t *dom; 801 group_t *hwset; 802 group_iter_t giter; 803 pg_cpu_itr_t cpu_iter; 804 pghw_type_t hw; 805 806 ASSERT(MUTEX_HELD(&cpu_lock)); 807 808 switch (type) { 809 case CPUPM_DTYPE_ACTIVE: 810 hw = PGHW_POW_ACTIVE; 811 break; 812 default: 813 /* 814 * Power domain types other than "active" unsupported. 815 */ 816 ASSERT(type == CPUPM_DTYPE_ACTIVE); 817 return; 818 } 819 820 if ((hwset = pghw_set_lookup(hw)) == NULL) 821 return; 822 823 /* 824 * Iterate over the power domains 825 */ 826 group_iter_init(&giter); 827 while ((pwr_pg = group_iterate(hwset, &giter)) != NULL) { 828 829 dom = (cpupm_domain_t *)pwr_pg->cmt_pg.pghw_handle; 830 831 /* 832 * Iterate over the CPUs in each domain 833 */ 834 PG_CPU_ITR_INIT(pwr_pg, cpu_iter); 835 while ((cp = pg_cpu_next(&cpu_iter)) != NULL) { 836 (void) cpupm_change_state(cp, dom, 837 dom->cpd_named_states[state]); 838 } 839 } 840 } 841