1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cpu_pm.h> 27 #include <sys/x86_archext.h> 28 #include <sys/sdt.h> 29 #include <sys/spl.h> 30 #include <sys/machsystm.h> 31 #include <sys/hpet.h> 32 #include <sys/cpupm.h> 33 #include <sys/cpu_idle.h> 34 #include <sys/cpu_acpi.h> 35 #include <sys/cpupm_throttle.h> 36 #include <sys/dtrace.h> 37 38 /* 39 * This callback is used to build the PPM CPU domains once 40 * all the CPU devices have been started. The callback is 41 * initialized by the PPM driver to point to a routine that 42 * will build the domains. 43 */ 44 void (*cpupm_rebuild_cpu_domains)(void); 45 46 /* 47 * This callback is used to reset the topspeed for all the 48 * CPU devices. The callback is initialized by the PPM driver to 49 * point to a routine that will reinitialize all the CPU devices 50 * once all the CPU devices have been started and the CPU domains 51 * built. 52 */ 53 void (*cpupm_init_topspeed)(void); 54 55 /* 56 * This callback is used to redefine the topspeed for a CPU device. 57 * Since all CPUs in a domain should have identical properties, this 58 * callback is initialized by the PPM driver to point to a routine 59 * that will redefine the topspeed for all devices in a CPU domain. 60 * This callback is exercised whenever an ACPI _PPC change notification 61 * is received by the CPU driver. 62 */ 63 void (*cpupm_redefine_topspeed)(void *); 64 65 /* 66 * This callback is used by the PPM driver to call into the CPU driver 67 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value). 68 */ 69 void (*cpupm_set_topspeed_callb)(void *, int); 70 71 /* 72 * This callback is used by the PPM driver to call into the CPU driver 73 * to set a new topspeed for a CPU. 74 */ 75 int (*cpupm_get_topspeed_callb)(void *); 76 77 static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *); 78 static void cpupm_free_notify_handlers(cpu_t *); 79 80 /* 81 * Until proven otherwise, all power states are manageable. 82 */ 83 static uint32_t cpupm_enabled = CPUPM_ALL_STATES; 84 85 /* 86 * Until all CPUs have started, we do not allow 87 * power management. 88 */ 89 static boolean_t cpupm_ready = B_FALSE; 90 91 cpupm_state_domains_t *cpupm_pstate_domains = NULL; 92 cpupm_state_domains_t *cpupm_tstate_domains = NULL; 93 cpupm_state_domains_t *cpupm_cstate_domains = NULL; 94 95 /* 96 * c-state tunables 97 * 98 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle 99 * divided by time spent in the idle state transitions. 100 * A value of 10 means the CPU will not spend more than 1/10 of its time 101 * in idle latency. The worst case performance will be 90% of non Deep C-state 102 * kernel. 103 * 104 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state 105 * before it is worth going there. Expressed as a multiple of latency. 106 */ 107 uint32_t cpupm_cs_sample_tunable = 5; /* samples in decision period */ 108 uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */ 109 uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */ 110 uint16_t cpupm_C2_idle_pct_tunable = 70; 111 uint16_t cpupm_C3_idle_pct_tunable = 80; 112 113 #ifndef __xpv 114 extern boolean_t cpupm_intel_init(cpu_t *); 115 extern boolean_t cpupm_amd_init(cpu_t *); 116 117 typedef struct cpupm_vendor { 118 boolean_t (*cpuv_init)(cpu_t *); 119 } cpupm_vendor_t; 120 121 /* 122 * Table of supported vendors. 123 */ 124 static cpupm_vendor_t cpupm_vendors[] = { 125 cpupm_intel_init, 126 cpupm_amd_init, 127 NULL 128 }; 129 #endif 130 131 /* 132 * Initialize the machine. 133 * See if a module exists for managing power for this CPU. 134 */ 135 /*ARGSUSED*/ 136 void 137 cpupm_init(cpu_t *cp) 138 { 139 #ifndef __xpv 140 cpupm_vendor_t *vendors; 141 cpupm_mach_state_t *mach_state; 142 struct machcpu *mcpu = &(cp->cpu_m); 143 int *speeds; 144 uint_t nspeeds; 145 int ret; 146 147 cpupm_set_supp_freqs(cp, NULL, 1); 148 149 mach_state = cp->cpu_m.mcpu_pm_mach_state = 150 kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP); 151 mach_state->ms_caps = CPUPM_NO_STATES; 152 mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL); 153 154 mach_state->ms_acpi_handle = cpu_acpi_init(cp); 155 if (mach_state->ms_acpi_handle == NULL) { 156 cpupm_free(cp); 157 cmn_err(CE_WARN, "!cpupm_init: processor %d: " 158 "unable to get ACPI handle", cp->cpu_id); 159 cmn_err(CE_NOTE, "!CPU power management will not function."); 160 CPUPM_DISABLE(); 161 return; 162 } 163 164 /* 165 * Loop through the CPU management module table and see if 166 * any of the modules implement CPU power management 167 * for this CPU. 168 */ 169 for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) { 170 if (vendors->cpuv_init(cp)) 171 break; 172 } 173 174 /* 175 * Nope, we can't power manage this CPU. 176 */ 177 if (vendors == NULL) { 178 cpupm_free(cp); 179 CPUPM_DISABLE(); 180 return; 181 } 182 183 /* 184 * If P-state support exists for this system, then initialize it. 185 */ 186 if (mach_state->ms_pstate.cma_ops != NULL) { 187 ret = mach_state->ms_pstate.cma_ops->cpus_init(cp); 188 if (ret != 0) { 189 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 190 " unable to initialize P-state support", 191 cp->cpu_id); 192 mach_state->ms_pstate.cma_ops = NULL; 193 cpupm_disable(CPUPM_P_STATES); 194 } else { 195 nspeeds = cpupm_get_speeds(cp, &speeds); 196 if (nspeeds == 0) { 197 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 198 " no speeds to manage", cp->cpu_id); 199 } else { 200 cpupm_set_supp_freqs(cp, speeds, nspeeds); 201 cpupm_free_speeds(speeds, nspeeds); 202 mach_state->ms_caps |= CPUPM_P_STATES; 203 } 204 } 205 } 206 207 if (mach_state->ms_tstate.cma_ops != NULL) { 208 ret = mach_state->ms_tstate.cma_ops->cpus_init(cp); 209 if (ret != 0) { 210 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 211 " unable to initialize T-state support", 212 cp->cpu_id); 213 mach_state->ms_tstate.cma_ops = NULL; 214 cpupm_disable(CPUPM_T_STATES); 215 } else { 216 mach_state->ms_caps |= CPUPM_T_STATES; 217 } 218 } 219 220 /* 221 * If C-states support exists for this system, then initialize it. 222 */ 223 if (mach_state->ms_cstate.cma_ops != NULL) { 224 ret = mach_state->ms_cstate.cma_ops->cpus_init(cp); 225 if (ret != 0) { 226 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 227 " unable to initialize C-state support", 228 cp->cpu_id); 229 mach_state->ms_cstate.cma_ops = NULL; 230 mcpu->max_cstates = CPU_ACPI_C1; 231 cpupm_disable(CPUPM_C_STATES); 232 idle_cpu = non_deep_idle_cpu; 233 disp_enq_thread = non_deep_idle_disp_enq_thread; 234 } else if (cpu_deep_cstates_supported()) { 235 mcpu->max_cstates = cpu_acpi_get_max_cstates( 236 mach_state->ms_acpi_handle); 237 if (mcpu->max_cstates > CPU_ACPI_C1) { 238 hpet.callback(CST_EVENT_MULTIPLE_CSTATES); 239 CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 240 mcpu->mcpu_idle_type = CPU_ACPI_C1; 241 disp_enq_thread = cstate_wakeup; 242 } else { 243 hpet.callback(CST_EVENT_ONE_CSTATE); 244 } 245 mach_state->ms_caps |= CPUPM_C_STATES; 246 } else { 247 mcpu->max_cstates = CPU_ACPI_C1; 248 idle_cpu = non_deep_idle_cpu; 249 disp_enq_thread = non_deep_idle_disp_enq_thread; 250 } 251 } 252 253 254 if (mach_state->ms_caps == CPUPM_NO_STATES) { 255 cpupm_free(cp); 256 CPUPM_DISABLE(); 257 return; 258 } 259 260 if ((mach_state->ms_caps & CPUPM_T_STATES) || 261 (mach_state->ms_caps & CPUPM_P_STATES) || 262 (mach_state->ms_caps & CPUPM_C_STATES)) 263 cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp); 264 #endif 265 } 266 267 /* 268 * Free any resources allocated by cpupm_init(). 269 */ 270 /*ARGSUSED*/ 271 void 272 cpupm_free(cpu_t *cp) 273 { 274 #ifndef __xpv 275 cpupm_mach_state_t *mach_state = 276 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 277 278 if (mach_state == NULL) 279 return; 280 if (mach_state->ms_pstate.cma_ops != NULL) { 281 mach_state->ms_pstate.cma_ops->cpus_fini(cp); 282 mach_state->ms_pstate.cma_ops = NULL; 283 } 284 285 if (mach_state->ms_tstate.cma_ops != NULL) { 286 mach_state->ms_tstate.cma_ops->cpus_fini(cp); 287 mach_state->ms_tstate.cma_ops = NULL; 288 } 289 290 if (mach_state->ms_cstate.cma_ops != NULL) { 291 mach_state->ms_cstate.cma_ops->cpus_fini(cp); 292 mach_state->ms_cstate.cma_ops = NULL; 293 } 294 295 cpupm_free_notify_handlers(cp); 296 297 if (mach_state->ms_acpi_handle != NULL) { 298 cpu_acpi_fini(mach_state->ms_acpi_handle); 299 mach_state->ms_acpi_handle = NULL; 300 } 301 302 mutex_destroy(&mach_state->ms_lock); 303 kmem_free(mach_state, sizeof (cpupm_mach_state_t)); 304 cp->cpu_m.mcpu_pm_mach_state = NULL; 305 #endif 306 } 307 308 /* 309 * If all CPUs have started and at least one power state is manageable, 310 * then the CPUs are ready for power management. 311 */ 312 boolean_t 313 cpupm_is_ready() 314 { 315 #ifndef __xpv 316 if (cpupm_enabled == CPUPM_NO_STATES) 317 return (B_FALSE); 318 return (cpupm_ready); 319 #else 320 return (B_FALSE); 321 #endif 322 323 } 324 325 boolean_t 326 cpupm_is_enabled(uint32_t state) 327 { 328 return ((cpupm_enabled & state) == state); 329 } 330 331 /* 332 * By default, all states are enabled. 333 */ 334 void 335 cpupm_disable(uint32_t state) 336 { 337 338 if (state & CPUPM_P_STATES) { 339 cpupm_free_domains(&cpupm_pstate_domains); 340 } 341 if (state & CPUPM_T_STATES) { 342 cpupm_free_domains(&cpupm_tstate_domains); 343 } 344 if (state & CPUPM_C_STATES) { 345 cpupm_free_domains(&cpupm_cstate_domains); 346 } 347 cpupm_enabled &= ~state; 348 } 349 350 /* 351 * Once all CPUs have been started, the PPM driver should build CPU 352 * domains and initialize the topspeed for all CPU devices. 353 */ 354 void 355 cpupm_post_startup() 356 { 357 #ifndef __xpv 358 /* 359 * The CPU domain built by the PPM during CPUs attaching 360 * should be rebuilt with the information retrieved from 361 * ACPI. 362 */ 363 if (cpupm_rebuild_cpu_domains != NULL) 364 (*cpupm_rebuild_cpu_domains)(); 365 366 /* 367 * Only initialize the topspeed if P-states are enabled. 368 */ 369 if (cpupm_enabled & CPUPM_P_STATES && cpupm_init_topspeed != NULL) 370 (*cpupm_init_topspeed)(); 371 #endif 372 cpupm_ready = B_TRUE; 373 } 374 375 /* 376 * Allocate power domains for C,P and T States 377 */ 378 void 379 cpupm_alloc_domains(cpu_t *cp, int state) 380 { 381 cpupm_mach_state_t *mach_state = 382 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 383 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 384 cpupm_state_domains_t **dom_ptr; 385 cpupm_state_domains_t *dptr; 386 cpupm_state_domains_t **mach_dom_state_ptr; 387 uint32_t domain; 388 uint32_t type; 389 390 switch (state) { 391 case CPUPM_P_STATES: 392 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) { 393 domain = CPU_ACPI_PSD(handle).sd_domain; 394 type = CPU_ACPI_PSD(handle).sd_type; 395 } else { 396 mutex_enter(&cpu_lock); 397 domain = cpuid_get_chipid(cp); 398 mutex_exit(&cpu_lock); 399 type = CPU_ACPI_HW_ALL; 400 } 401 dom_ptr = &cpupm_pstate_domains; 402 mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain; 403 break; 404 case CPUPM_T_STATES: 405 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) { 406 domain = CPU_ACPI_TSD(handle).sd_domain; 407 type = CPU_ACPI_TSD(handle).sd_type; 408 } else { 409 mutex_enter(&cpu_lock); 410 domain = cpuid_get_chipid(cp); 411 mutex_exit(&cpu_lock); 412 type = CPU_ACPI_HW_ALL; 413 } 414 dom_ptr = &cpupm_tstate_domains; 415 mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain; 416 break; 417 case CPUPM_C_STATES: 418 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) { 419 domain = CPU_ACPI_CSD(handle).sd_domain; 420 type = CPU_ACPI_CSD(handle).sd_type; 421 } else { 422 mutex_enter(&cpu_lock); 423 domain = cpuid_get_coreid(cp); 424 mutex_exit(&cpu_lock); 425 type = CPU_ACPI_HW_ALL; 426 } 427 dom_ptr = &cpupm_cstate_domains; 428 mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain; 429 break; 430 default: 431 return; 432 } 433 434 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 435 if (dptr->pm_domain == domain) 436 break; 437 } 438 439 /* new domain is created and linked at the head */ 440 if (dptr == NULL) { 441 dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP); 442 dptr->pm_domain = domain; 443 dptr->pm_type = type; 444 dptr->pm_next = *dom_ptr; 445 mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN, 446 (void *)ipltospl(DISP_LEVEL)); 447 CPUSET_ZERO(dptr->pm_cpus); 448 *dom_ptr = dptr; 449 } 450 CPUSET_ADD(dptr->pm_cpus, cp->cpu_id); 451 *mach_dom_state_ptr = dptr; 452 } 453 454 /* 455 * Free C, P or T state power domains 456 */ 457 void 458 cpupm_free_domains(cpupm_state_domains_t **dom_ptr) 459 { 460 cpupm_state_domains_t *this_domain, *next_domain; 461 462 this_domain = *dom_ptr; 463 while (this_domain != NULL) { 464 next_domain = this_domain->pm_next; 465 mutex_destroy(&this_domain->pm_lock); 466 kmem_free((void *)this_domain, 467 sizeof (cpupm_state_domains_t)); 468 this_domain = next_domain; 469 } 470 *dom_ptr = NULL; 471 } 472 473 void 474 cpupm_alloc_ms_cstate(cpu_t *cp) 475 { 476 cpupm_mach_state_t *mach_state; 477 cpupm_mach_acpi_state_t *ms_cstate; 478 479 mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 480 ms_cstate = &mach_state->ms_cstate; 481 ASSERT(ms_cstate->cma_state.cstate == NULL); 482 ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t), 483 KM_SLEEP); 484 ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1; 485 } 486 487 void 488 cpupm_free_ms_cstate(cpu_t *cp) 489 { 490 cpupm_mach_state_t *mach_state = 491 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 492 cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate; 493 494 if (ms_cstate->cma_state.cstate != NULL) { 495 kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t)); 496 ms_cstate->cma_state.cstate = NULL; 497 } 498 } 499 500 void 501 cpupm_state_change(cpu_t *cp, int level, int state) 502 { 503 cpupm_mach_state_t *mach_state = 504 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 505 cpupm_state_ops_t *state_ops; 506 cpupm_state_domains_t *state_domain; 507 cpuset_t set; 508 509 DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level); 510 511 if (mach_state == NULL) { 512 return; 513 } 514 515 switch (state) { 516 case CPUPM_P_STATES: 517 state_ops = mach_state->ms_pstate.cma_ops; 518 state_domain = mach_state->ms_pstate.cma_domain; 519 break; 520 case CPUPM_T_STATES: 521 state_ops = mach_state->ms_tstate.cma_ops; 522 state_domain = mach_state->ms_tstate.cma_domain; 523 break; 524 default: 525 break; 526 } 527 528 switch (state_domain->pm_type) { 529 case CPU_ACPI_SW_ANY: 530 /* 531 * A request on any CPU in the domain transitions the domain 532 */ 533 CPUSET_ONLY(set, cp->cpu_id); 534 state_ops->cpus_change(set, level); 535 break; 536 case CPU_ACPI_SW_ALL: 537 /* 538 * All CPUs in the domain must request the transition 539 */ 540 case CPU_ACPI_HW_ALL: 541 /* 542 * P/T-state transitions are coordinated by the hardware 543 * For now, request the transition on all CPUs in the domain, 544 * but looking ahead we can probably be smarter about this. 545 */ 546 mutex_enter(&state_domain->pm_lock); 547 state_ops->cpus_change(state_domain->pm_cpus, level); 548 mutex_exit(&state_domain->pm_lock); 549 break; 550 default: 551 cmn_err(CE_WARN, "Unknown domain coordination type: %d", 552 state_domain->pm_type); 553 } 554 } 555 556 /* 557 * CPU PM interfaces exposed to the CPU power manager 558 */ 559 /*ARGSUSED*/ 560 id_t 561 cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type) 562 { 563 cpupm_mach_state_t *mach_state = 564 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 565 566 if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) && 567 !cpupm_is_enabled(CPUPM_C_STATES))) { 568 return (CPUPM_NO_DOMAIN); 569 } 570 if (type == CPUPM_DTYPE_ACTIVE) { 571 /* 572 * Return P-State domain for the specified CPU 573 */ 574 if (mach_state->ms_pstate.cma_domain) { 575 return (mach_state->ms_pstate.cma_domain->pm_domain); 576 } 577 } else if (type == CPUPM_DTYPE_IDLE) { 578 /* 579 * Return C-State domain for the specified CPU 580 */ 581 if (mach_state->ms_cstate.cma_domain) { 582 return (mach_state->ms_cstate.cma_domain->pm_domain); 583 } 584 } 585 return (CPUPM_NO_DOMAIN); 586 } 587 588 /*ARGSUSED*/ 589 uint_t 590 cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type, 591 cpupm_state_t *states) 592 { 593 int *speeds; 594 uint_t nspeeds, i; 595 596 /* 597 * Idle domain support unimplemented 598 */ 599 if (type != CPUPM_DTYPE_ACTIVE) { 600 return (0); 601 } 602 nspeeds = cpupm_get_speeds(cp, &speeds); 603 604 /* 605 * If the caller passes NULL for states, just return the 606 * number of states. 607 */ 608 if (states != NULL) { 609 for (i = 0; i < nspeeds; i++) { 610 states[i].cps_speed = speeds[i]; 611 states[i].cps_handle = (cpupm_handle_t)i; 612 } 613 } 614 cpupm_free_speeds(speeds, nspeeds); 615 return (nspeeds); 616 } 617 618 /*ARGSUSED*/ 619 int 620 cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state) 621 { 622 if (!cpupm_is_ready()) 623 return (-1); 624 625 cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES); 626 627 return (0); 628 } 629 630 /*ARGSUSED*/ 631 /* 632 * Note: It is the responsibility of the users of 633 * cpupm_get_speeds() to free the memory allocated 634 * for speeds using cpupm_free_speeds() 635 */ 636 uint_t 637 cpupm_get_speeds(cpu_t *cp, int **speeds) 638 { 639 #ifndef __xpv 640 cpupm_mach_state_t *mach_state = 641 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 642 return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds)); 643 #else 644 return (0); 645 #endif 646 } 647 648 /*ARGSUSED*/ 649 void 650 cpupm_free_speeds(int *speeds, uint_t nspeeds) 651 { 652 #ifndef __xpv 653 cpu_acpi_free_speeds(speeds, nspeeds); 654 #endif 655 } 656 657 /* 658 * All CPU instances have been initialized successfully. 659 */ 660 boolean_t 661 cpupm_power_ready(void) 662 { 663 return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready()); 664 } 665 666 /* 667 * All CPU instances have been initialized successfully. 668 */ 669 boolean_t 670 cpupm_throttle_ready(void) 671 { 672 return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready()); 673 } 674 675 /* 676 * All CPU instances have been initialized successfully. 677 */ 678 boolean_t 679 cpupm_cstate_ready(void) 680 { 681 return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready()); 682 } 683 684 void 685 cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 686 { 687 cpu_t *cp = ctx; 688 cpupm_mach_state_t *mach_state = 689 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 690 cpupm_notification_t *entry; 691 692 mutex_enter(&mach_state->ms_lock); 693 for (entry = mach_state->ms_handlers; entry != NULL; 694 entry = entry->nq_next) { 695 entry->nq_handler(obj, val, entry->nq_ctx); 696 } 697 mutex_exit(&mach_state->ms_lock); 698 } 699 700 /*ARGSUSED*/ 701 void 702 cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx) 703 { 704 #ifndef __xpv 705 cpupm_mach_state_t *mach_state = 706 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 707 cpupm_notification_t *entry; 708 709 entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP); 710 entry->nq_handler = handler; 711 entry->nq_ctx = ctx; 712 mutex_enter(&mach_state->ms_lock); 713 if (mach_state->ms_handlers == NULL) { 714 entry->nq_next = NULL; 715 mach_state->ms_handlers = entry; 716 cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle, 717 cpupm_notify_handler, cp); 718 719 } else { 720 entry->nq_next = mach_state->ms_handlers; 721 mach_state->ms_handlers = entry; 722 } 723 mutex_exit(&mach_state->ms_lock); 724 #endif 725 } 726 727 /*ARGSUSED*/ 728 static void 729 cpupm_free_notify_handlers(cpu_t *cp) 730 { 731 #ifndef __xpv 732 cpupm_mach_state_t *mach_state = 733 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 734 cpupm_notification_t *entry; 735 cpupm_notification_t *next; 736 737 mutex_enter(&mach_state->ms_lock); 738 if (mach_state->ms_handlers == NULL) { 739 mutex_exit(&mach_state->ms_lock); 740 return; 741 } 742 if (mach_state->ms_acpi_handle != NULL) { 743 cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle, 744 cpupm_notify_handler); 745 } 746 entry = mach_state->ms_handlers; 747 while (entry != NULL) { 748 next = entry->nq_next; 749 kmem_free(entry, sizeof (cpupm_notification_t)); 750 entry = next; 751 } 752 mach_state->ms_handlers = NULL; 753 mutex_exit(&mach_state->ms_lock); 754 #endif 755 } 756 757 /* 758 * Get the current max speed from the ACPI _PPC object 759 */ 760 /*ARGSUSED*/ 761 int 762 cpupm_get_top_speed(cpu_t *cp) 763 { 764 #ifndef __xpv 765 cpupm_mach_state_t *mach_state; 766 cpu_acpi_handle_t handle; 767 int plat_level; 768 uint_t nspeeds; 769 int max_level; 770 771 mach_state = 772 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 773 handle = mach_state->ms_acpi_handle; 774 775 cpu_acpi_cache_ppc(handle); 776 plat_level = CPU_ACPI_PPC(handle); 777 778 nspeeds = CPU_ACPI_PSTATES_COUNT(handle); 779 780 max_level = nspeeds - 1; 781 if ((plat_level < 0) || (plat_level > max_level)) { 782 cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: " 783 "_PPC out of range %d", cp->cpu_id, plat_level); 784 plat_level = 0; 785 } 786 787 return (plat_level); 788 #else 789 return (0); 790 #endif 791 } 792 793 /* 794 * This notification handler is called whenever the ACPI _PPC 795 * object changes. The _PPC is a sort of governor on power levels. 796 * It sets an upper threshold on which, _PSS defined, power levels 797 * are usuable. The _PPC value is dynamic and may change as properties 798 * (i.e., thermal or AC source) of the system change. 799 */ 800 801 static void 802 cpupm_power_manage_notifications(void *ctx) 803 { 804 cpu_t *cp = ctx; 805 int top_speed; 806 807 top_speed = cpupm_get_top_speed(cp); 808 cpupm_redefine_max_activepwr_state(cp, top_speed); 809 } 810 811 /* ARGSUSED */ 812 static void 813 cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 814 { 815 #ifndef __xpv 816 /* 817 * Currently, we handle _TPC,_CST and _PPC change notifications. 818 */ 819 if (val == CPUPM_TPC_CHANGE_NOTIFICATION) { 820 cpupm_throttle_manage_notification(ctx); 821 } else if (val == CPUPM_CST_CHANGE_NOTIFICATION) { 822 cpuidle_manage_cstates(ctx); 823 } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION) { 824 cpupm_power_manage_notifications(ctx); 825 } 826 #endif 827 } 828 829 /* 830 * Update cpupm cstate data each time CPU exits idle. 831 */ 832 void 833 cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end) 834 { 835 cs_data->cs_idle_exit = end; 836 } 837 838 /* 839 * Determine next cstate based on cpupm data. 840 * Update cpupm cstate data each time CPU goes idle. 841 * Do as much as possible in the idle state bookkeeping function because the 842 * performance impact while idle is minimal compared to in the wakeup function 843 * when there is real work to do. 844 */ 845 uint32_t 846 cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates, 847 uint32_t cs_count, hrtime_t start) 848 { 849 hrtime_t duration; 850 hrtime_t ave_interval; 851 hrtime_t ave_idle_time; 852 uint32_t i; 853 854 duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter; 855 scalehrtime(&duration); 856 cs_data->cs_idle += duration; 857 cs_data->cs_idle_enter = start; 858 859 ++cs_data->cs_cnt; 860 if (cs_data->cs_cnt > cpupm_cs_sample_tunable) { 861 cs_data->cs_smpl_len = start - cs_data->cs_smpl_start; 862 scalehrtime(&cs_data->cs_smpl_len); 863 cs_data->cs_smpl_len |= 1; /* protect from DIV 0 */ 864 cs_data->cs_smpl_idle = cs_data->cs_idle; 865 cs_data->cs_idle = 0; 866 cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) / 867 cs_data->cs_smpl_len); 868 869 cs_data->cs_smpl_start = start; 870 cs_data->cs_cnt = 0; 871 872 /* 873 * Strand level C-state policy 874 * The cpu_acpi_cstate_t *cstates array is not required to 875 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3. 876 * There are cs_count entries in the cstates array. 877 * cs_data->cs_next_cstate contains the index of the next 878 * C-state this CPU should enter. 879 */ 880 ASSERT(cstates[0].cs_type == CPU_ACPI_C1); 881 882 /* 883 * Will CPU be idle long enough to save power? 884 */ 885 ave_idle_time = (cs_data->cs_smpl_idle / 886 cpupm_cs_sample_tunable) / 1000; 887 for (i = 1; i < cs_count; ++i) { 888 if (ave_idle_time < (cstates[i].cs_latency * 889 cpupm_cs_idle_save_tunable)) { 890 cs_count = i; 891 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 892 CPU, int, i); 893 } 894 } 895 896 /* 897 * Wakeup often (even when non-idle time is very short)? 898 * Some producer/consumer type loads fall into this category. 899 */ 900 ave_interval = (cs_data->cs_smpl_len / cpupm_cs_sample_tunable) 901 / 1000; 902 for (i = 1; i < cs_count; ++i) { 903 if (ave_interval <= (cstates[i].cs_latency * 904 cpupm_cs_idle_cost_tunable)) { 905 cs_count = i; 906 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 907 CPU, int, (CPU_MAX_CSTATES + i)); 908 } 909 } 910 911 /* 912 * Idle percent 913 */ 914 for (i = 1; i < cs_count; ++i) { 915 switch (cstates[i].cs_type) { 916 case CPU_ACPI_C2: 917 if (cs_data->cs_smpl_idle_pct < 918 cpupm_C2_idle_pct_tunable) { 919 cs_count = i; 920 DTRACE_PROBE2(cpupm__next__cstate, 921 cpu_t *, CPU, int, 922 ((2 * CPU_MAX_CSTATES) + i)); 923 } 924 break; 925 926 case CPU_ACPI_C3: 927 if (cs_data->cs_smpl_idle_pct < 928 cpupm_C3_idle_pct_tunable) { 929 cs_count = i; 930 DTRACE_PROBE2(cpupm__next__cstate, 931 cpu_t *, CPU, int, 932 ((2 * CPU_MAX_CSTATES) + i)); 933 } 934 break; 935 } 936 } 937 938 cs_data->cs_next_cstate = cs_count - 1; 939 } 940 941 return (cs_data->cs_next_cstate); 942 } 943