1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cpu_pm.h> 27 #include <sys/x86_archext.h> 28 #include <sys/sdt.h> 29 #include <sys/spl.h> 30 #include <sys/machsystm.h> 31 #include <sys/hpet.h> 32 #include <sys/cpupm.h> 33 #include <sys/cpu_idle.h> 34 #include <sys/cpu_acpi.h> 35 #include <sys/cpupm_throttle.h> 36 37 /* 38 * This callback is used to build the PPM CPU domains once 39 * all the CPU devices have been started. The callback is 40 * initialized by the PPM driver to point to a routine that 41 * will build the domains. 42 */ 43 void (*cpupm_rebuild_cpu_domains)(void); 44 45 /* 46 * This callback is used to reset the topspeed for all the 47 * CPU devices. The callback is initialized by the PPM driver to 48 * point to a routine that will reinitialize all the CPU devices 49 * once all the CPU devices have been started and the CPU domains 50 * built. 51 */ 52 void (*cpupm_init_topspeed)(void); 53 54 /* 55 * This callback is used to redefine the topspeed for a CPU device. 56 * Since all CPUs in a domain should have identical properties, this 57 * callback is initialized by the PPM driver to point to a routine 58 * that will redefine the topspeed for all devices in a CPU domain. 59 * This callback is exercised whenever an ACPI _PPC change notification 60 * is received by the CPU driver. 61 */ 62 void (*cpupm_redefine_topspeed)(void *); 63 64 /* 65 * This callback is used by the PPM driver to call into the CPU driver 66 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value). 67 */ 68 void (*cpupm_set_topspeed_callb)(void *, int); 69 70 /* 71 * This callback is used by the PPM driver to call into the CPU driver 72 * to set a new topspeed for a CPU. 73 */ 74 int (*cpupm_get_topspeed_callb)(void *); 75 76 static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *); 77 static void cpupm_free_notify_handlers(cpu_t *); 78 79 /* 80 * Until proven otherwise, all power states are manageable. 81 */ 82 static uint32_t cpupm_enabled = CPUPM_ALL_STATES; 83 84 /* 85 * Until all CPUs have started, we do not allow 86 * power management. 87 */ 88 static boolean_t cpupm_ready = B_FALSE; 89 90 cpupm_state_domains_t *cpupm_pstate_domains = NULL; 91 cpupm_state_domains_t *cpupm_tstate_domains = NULL; 92 cpupm_state_domains_t *cpupm_cstate_domains = NULL; 93 94 /* 95 * c-state tunables 96 * 97 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle 98 * divided by time spent in the idle state transitions. 99 * A value of 10 means the CPU will not spend more than 1/10 of its time 100 * in idle latency. The worst case performance will be 90% of non Deep C-state 101 * kernel. 102 * 103 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state 104 * before it is worth going there. Expressed as a multiple of latency. 105 */ 106 uint32_t cpupm_cs_sample_tunable = 5; /* samples in decision period */ 107 uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */ 108 uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */ 109 uint16_t cpupm_C2_idle_pct_tunable = 70; 110 uint16_t cpupm_C3_idle_pct_tunable = 80; 111 112 #ifndef __xpv 113 extern boolean_t cpupm_intel_init(cpu_t *); 114 extern boolean_t cpupm_amd_init(cpu_t *); 115 116 typedef struct cpupm_vendor { 117 boolean_t (*cpuv_init)(cpu_t *); 118 } cpupm_vendor_t; 119 120 /* 121 * Table of supported vendors. 122 */ 123 static cpupm_vendor_t cpupm_vendors[] = { 124 cpupm_intel_init, 125 cpupm_amd_init, 126 NULL 127 }; 128 #endif 129 130 /* 131 * Initialize the machine. 132 * See if a module exists for managing power for this CPU. 133 */ 134 /*ARGSUSED*/ 135 void 136 cpupm_init(cpu_t *cp) 137 { 138 #ifndef __xpv 139 cpupm_vendor_t *vendors; 140 cpupm_mach_state_t *mach_state; 141 struct machcpu *mcpu = &(cp->cpu_m); 142 int *speeds; 143 uint_t nspeeds; 144 int ret; 145 146 cpupm_set_supp_freqs(cp, NULL, 1); 147 148 mach_state = cp->cpu_m.mcpu_pm_mach_state = 149 kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP); 150 mach_state->ms_caps = CPUPM_NO_STATES; 151 mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL); 152 153 mach_state->ms_acpi_handle = cpu_acpi_init(cp); 154 if (mach_state->ms_acpi_handle == NULL) { 155 cpupm_free(cp); 156 cmn_err(CE_WARN, "!cpupm_init: processor %d: " 157 "unable to get ACPI handle", cp->cpu_id); 158 cmn_err(CE_NOTE, "!CPU power management will not function."); 159 CPUPM_DISABLE(); 160 return; 161 } 162 163 /* 164 * Loop through the CPU management module table and see if 165 * any of the modules implement CPU power management 166 * for this CPU. 167 */ 168 for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) { 169 if (vendors->cpuv_init(cp)) 170 break; 171 } 172 173 /* 174 * Nope, we can't power manage this CPU. 175 */ 176 if (vendors == NULL) { 177 cpupm_free(cp); 178 CPUPM_DISABLE(); 179 return; 180 } 181 182 /* 183 * If P-state support exists for this system, then initialize it. 184 */ 185 if (mach_state->ms_pstate.cma_ops != NULL) { 186 ret = mach_state->ms_pstate.cma_ops->cpus_init(cp); 187 if (ret != 0) { 188 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 189 " unable to initialize P-state support", 190 cp->cpu_id); 191 mach_state->ms_pstate.cma_ops = NULL; 192 cpupm_disable(CPUPM_P_STATES); 193 } else { 194 nspeeds = cpupm_get_speeds(cp, &speeds); 195 if (nspeeds == 0) { 196 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 197 " no speeds to manage", cp->cpu_id); 198 } else { 199 cpupm_set_supp_freqs(cp, speeds, nspeeds); 200 cpupm_free_speeds(speeds, nspeeds); 201 mach_state->ms_caps |= CPUPM_P_STATES; 202 } 203 } 204 } 205 206 if (mach_state->ms_tstate.cma_ops != NULL) { 207 ret = mach_state->ms_tstate.cma_ops->cpus_init(cp); 208 if (ret != 0) { 209 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 210 " unable to initialize T-state support", 211 cp->cpu_id); 212 mach_state->ms_tstate.cma_ops = NULL; 213 cpupm_disable(CPUPM_T_STATES); 214 } else { 215 mach_state->ms_caps |= CPUPM_T_STATES; 216 } 217 } 218 219 /* 220 * If C-states support exists for this system, then initialize it. 221 */ 222 if (mach_state->ms_cstate.cma_ops != NULL) { 223 ret = mach_state->ms_cstate.cma_ops->cpus_init(cp); 224 if (ret != 0) { 225 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 226 " unable to initialize C-state support", 227 cp->cpu_id); 228 mach_state->ms_cstate.cma_ops = NULL; 229 mcpu->max_cstates = CPU_ACPI_C1; 230 cpupm_disable(CPUPM_C_STATES); 231 idle_cpu = non_deep_idle_cpu; 232 disp_enq_thread = non_deep_idle_disp_enq_thread; 233 } else if (cpu_deep_cstates_supported()) { 234 mcpu->max_cstates = cpu_acpi_get_max_cstates( 235 mach_state->ms_acpi_handle); 236 if (mcpu->max_cstates > CPU_ACPI_C1) { 237 hpet.callback(CST_EVENT_MULTIPLE_CSTATES); 238 CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 239 mcpu->mcpu_idle_type = CPU_ACPI_C1; 240 disp_enq_thread = cstate_wakeup; 241 } else { 242 hpet.callback(CST_EVENT_ONE_CSTATE); 243 } 244 mach_state->ms_caps |= CPUPM_C_STATES; 245 } else { 246 mcpu->max_cstates = CPU_ACPI_C1; 247 idle_cpu = non_deep_idle_cpu; 248 disp_enq_thread = non_deep_idle_disp_enq_thread; 249 } 250 } 251 252 253 if (mach_state->ms_caps == CPUPM_NO_STATES) { 254 cpupm_free(cp); 255 CPUPM_DISABLE(); 256 return; 257 } 258 259 if ((mach_state->ms_caps & CPUPM_T_STATES) || 260 (mach_state->ms_caps & CPUPM_P_STATES) || 261 (mach_state->ms_caps & CPUPM_C_STATES)) 262 cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp); 263 #endif 264 } 265 266 /* 267 * Free any resources allocated by cpupm_init(). 268 */ 269 /*ARGSUSED*/ 270 void 271 cpupm_free(cpu_t *cp) 272 { 273 #ifndef __xpv 274 cpupm_mach_state_t *mach_state = 275 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 276 277 if (mach_state == NULL) 278 return; 279 if (mach_state->ms_pstate.cma_ops != NULL) { 280 mach_state->ms_pstate.cma_ops->cpus_fini(cp); 281 mach_state->ms_pstate.cma_ops = NULL; 282 } 283 284 if (mach_state->ms_tstate.cma_ops != NULL) { 285 mach_state->ms_tstate.cma_ops->cpus_fini(cp); 286 mach_state->ms_tstate.cma_ops = NULL; 287 } 288 289 if (mach_state->ms_cstate.cma_ops != NULL) { 290 mach_state->ms_cstate.cma_ops->cpus_fini(cp); 291 mach_state->ms_cstate.cma_ops = NULL; 292 } 293 294 cpupm_free_notify_handlers(cp); 295 296 if (mach_state->ms_acpi_handle != NULL) { 297 cpu_acpi_fini(mach_state->ms_acpi_handle); 298 mach_state->ms_acpi_handle = NULL; 299 } 300 301 mutex_destroy(&mach_state->ms_lock); 302 kmem_free(mach_state, sizeof (cpupm_mach_state_t)); 303 cp->cpu_m.mcpu_pm_mach_state = NULL; 304 #endif 305 } 306 307 /* 308 * If all CPUs have started and at least one power state is manageable, 309 * then the CPUs are ready for power management. 310 */ 311 boolean_t 312 cpupm_is_ready() 313 { 314 #ifndef __xpv 315 if (cpupm_enabled == CPUPM_NO_STATES) 316 return (B_FALSE); 317 return (cpupm_ready); 318 #else 319 return (B_FALSE); 320 #endif 321 322 } 323 324 boolean_t 325 cpupm_is_enabled(uint32_t state) 326 { 327 return ((cpupm_enabled & state) == state); 328 } 329 330 /* 331 * By default, all states are enabled. 332 */ 333 void 334 cpupm_disable(uint32_t state) 335 { 336 337 if (state & CPUPM_P_STATES) { 338 cpupm_free_domains(&cpupm_pstate_domains); 339 } 340 if (state & CPUPM_T_STATES) { 341 cpupm_free_domains(&cpupm_tstate_domains); 342 } 343 if (state & CPUPM_C_STATES) { 344 cpupm_free_domains(&cpupm_cstate_domains); 345 } 346 cpupm_enabled &= ~state; 347 } 348 349 /* 350 * Once all CPUs have been started, the PPM driver should build CPU 351 * domains and initialize the topspeed for all CPU devices. 352 */ 353 void 354 cpupm_post_startup() 355 { 356 #ifndef __xpv 357 /* 358 * The CPU domain built by the PPM during CPUs attaching 359 * should be rebuilt with the information retrieved from 360 * ACPI. 361 */ 362 if (cpupm_rebuild_cpu_domains != NULL) 363 (*cpupm_rebuild_cpu_domains)(); 364 365 /* 366 * Only initialize the topspeed if P-states are enabled. 367 */ 368 if (cpupm_enabled & CPUPM_P_STATES && cpupm_init_topspeed != NULL) 369 (*cpupm_init_topspeed)(); 370 #endif 371 cpupm_ready = B_TRUE; 372 } 373 374 /* 375 * Allocate power domains for C,P and T States 376 */ 377 void 378 cpupm_alloc_domains(cpu_t *cp, int state) 379 { 380 cpupm_mach_state_t *mach_state = 381 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 382 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 383 cpupm_state_domains_t **dom_ptr; 384 cpupm_state_domains_t *dptr; 385 cpupm_state_domains_t **mach_dom_state_ptr; 386 uint32_t domain; 387 uint32_t type; 388 389 switch (state) { 390 case CPUPM_P_STATES: 391 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) { 392 domain = CPU_ACPI_PSD(handle).sd_domain; 393 type = CPU_ACPI_PSD(handle).sd_type; 394 } else { 395 mutex_enter(&cpu_lock); 396 domain = cpuid_get_chipid(cp); 397 mutex_exit(&cpu_lock); 398 type = CPU_ACPI_HW_ALL; 399 } 400 dom_ptr = &cpupm_pstate_domains; 401 mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain; 402 break; 403 case CPUPM_T_STATES: 404 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) { 405 domain = CPU_ACPI_TSD(handle).sd_domain; 406 type = CPU_ACPI_TSD(handle).sd_type; 407 } else { 408 mutex_enter(&cpu_lock); 409 domain = cpuid_get_chipid(cp); 410 mutex_exit(&cpu_lock); 411 type = CPU_ACPI_HW_ALL; 412 } 413 dom_ptr = &cpupm_tstate_domains; 414 mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain; 415 break; 416 case CPUPM_C_STATES: 417 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) { 418 domain = CPU_ACPI_CSD(handle).sd_domain; 419 type = CPU_ACPI_CSD(handle).sd_type; 420 } else { 421 mutex_enter(&cpu_lock); 422 domain = cpuid_get_coreid(cp); 423 mutex_exit(&cpu_lock); 424 type = CPU_ACPI_HW_ALL; 425 } 426 dom_ptr = &cpupm_cstate_domains; 427 mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain; 428 break; 429 default: 430 return; 431 } 432 433 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 434 if (dptr->pm_domain == domain) 435 break; 436 } 437 438 /* new domain is created and linked at the head */ 439 if (dptr == NULL) { 440 dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP); 441 dptr->pm_domain = domain; 442 dptr->pm_type = type; 443 dptr->pm_next = *dom_ptr; 444 mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN, 445 (void *)ipltospl(DISP_LEVEL)); 446 CPUSET_ZERO(dptr->pm_cpus); 447 *dom_ptr = dptr; 448 } 449 CPUSET_ADD(dptr->pm_cpus, cp->cpu_id); 450 *mach_dom_state_ptr = dptr; 451 } 452 453 /* 454 * Free C, P or T state power domains 455 */ 456 void 457 cpupm_free_domains(cpupm_state_domains_t **dom_ptr) 458 { 459 cpupm_state_domains_t *this_domain, *next_domain; 460 461 this_domain = *dom_ptr; 462 while (this_domain != NULL) { 463 next_domain = this_domain->pm_next; 464 mutex_destroy(&this_domain->pm_lock); 465 kmem_free((void *)this_domain, 466 sizeof (cpupm_state_domains_t)); 467 this_domain = next_domain; 468 } 469 *dom_ptr = NULL; 470 } 471 472 void 473 cpupm_alloc_ms_cstate(cpu_t *cp) 474 { 475 cpupm_mach_state_t *mach_state; 476 cpupm_mach_acpi_state_t *ms_cstate; 477 478 mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 479 ms_cstate = &mach_state->ms_cstate; 480 ASSERT(ms_cstate->cma_state.cstate == NULL); 481 ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t), 482 KM_SLEEP); 483 ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1; 484 } 485 486 void 487 cpupm_free_ms_cstate(cpu_t *cp) 488 { 489 cpupm_mach_state_t *mach_state = 490 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 491 cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate; 492 493 if (ms_cstate->cma_state.cstate != NULL) { 494 kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t)); 495 ms_cstate->cma_state.cstate = NULL; 496 } 497 } 498 499 void 500 cpupm_state_change(cpu_t *cp, int level, int state) 501 { 502 cpupm_mach_state_t *mach_state = 503 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 504 cpupm_state_ops_t *state_ops; 505 cpupm_state_domains_t *state_domain; 506 cpuset_t set; 507 508 DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level); 509 510 if (mach_state == NULL) { 511 return; 512 } 513 514 switch (state) { 515 case CPUPM_P_STATES: 516 state_ops = mach_state->ms_pstate.cma_ops; 517 state_domain = mach_state->ms_pstate.cma_domain; 518 break; 519 case CPUPM_T_STATES: 520 state_ops = mach_state->ms_tstate.cma_ops; 521 state_domain = mach_state->ms_tstate.cma_domain; 522 break; 523 default: 524 break; 525 } 526 527 switch (state_domain->pm_type) { 528 case CPU_ACPI_SW_ANY: 529 /* 530 * A request on any CPU in the domain transitions the domain 531 */ 532 CPUSET_ONLY(set, cp->cpu_id); 533 state_ops->cpus_change(set, level); 534 break; 535 case CPU_ACPI_SW_ALL: 536 /* 537 * All CPUs in the domain must request the transition 538 */ 539 case CPU_ACPI_HW_ALL: 540 /* 541 * P/T-state transitions are coordinated by the hardware 542 * For now, request the transition on all CPUs in the domain, 543 * but looking ahead we can probably be smarter about this. 544 */ 545 mutex_enter(&state_domain->pm_lock); 546 state_ops->cpus_change(state_domain->pm_cpus, level); 547 mutex_exit(&state_domain->pm_lock); 548 break; 549 default: 550 cmn_err(CE_WARN, "Unknown domain coordination type: %d", 551 state_domain->pm_type); 552 } 553 } 554 555 /* 556 * CPU PM interfaces exposed to the CPU power manager 557 */ 558 /*ARGSUSED*/ 559 id_t 560 cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type) 561 { 562 cpupm_mach_state_t *mach_state = 563 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 564 565 if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) && 566 !cpupm_is_enabled(CPUPM_C_STATES))) { 567 return (CPUPM_NO_DOMAIN); 568 } 569 if (type == CPUPM_DTYPE_ACTIVE) { 570 /* 571 * Return P-State domain for the specified CPU 572 */ 573 if (mach_state->ms_pstate.cma_domain) { 574 return (mach_state->ms_pstate.cma_domain->pm_domain); 575 } 576 } else if (type == CPUPM_DTYPE_IDLE) { 577 /* 578 * Return C-State domain for the specified CPU 579 */ 580 if (mach_state->ms_cstate.cma_domain) { 581 return (mach_state->ms_cstate.cma_domain->pm_domain); 582 } 583 } 584 return (CPUPM_NO_DOMAIN); 585 } 586 587 /*ARGSUSED*/ 588 uint_t 589 cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type, 590 cpupm_state_t *states) 591 { 592 int *speeds; 593 uint_t nspeeds, i; 594 595 /* 596 * Idle domain support unimplemented 597 */ 598 if (type != CPUPM_DTYPE_ACTIVE) { 599 return (0); 600 } 601 nspeeds = cpupm_get_speeds(cp, &speeds); 602 603 /* 604 * If the caller passes NULL for states, just return the 605 * number of states. 606 */ 607 if (states != NULL) { 608 for (i = 0; i < nspeeds; i++) { 609 states[i].cps_speed = speeds[i]; 610 states[i].cps_handle = (cpupm_handle_t)i; 611 } 612 } 613 cpupm_free_speeds(speeds, nspeeds); 614 return (nspeeds); 615 } 616 617 /*ARGSUSED*/ 618 int 619 cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state) 620 { 621 if (!cpupm_is_ready()) 622 return (-1); 623 624 cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES); 625 626 return (0); 627 } 628 629 /*ARGSUSED*/ 630 /* 631 * Note: It is the responsibility of the users of 632 * cpupm_get_speeds() to free the memory allocated 633 * for speeds using cpupm_free_speeds() 634 */ 635 uint_t 636 cpupm_get_speeds(cpu_t *cp, int **speeds) 637 { 638 #ifndef __xpv 639 cpupm_mach_state_t *mach_state = 640 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 641 return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds)); 642 #else 643 return (0); 644 #endif 645 } 646 647 /*ARGSUSED*/ 648 void 649 cpupm_free_speeds(int *speeds, uint_t nspeeds) 650 { 651 #ifndef __xpv 652 cpu_acpi_free_speeds(speeds, nspeeds); 653 #endif 654 } 655 656 /* 657 * All CPU instances have been initialized successfully. 658 */ 659 boolean_t 660 cpupm_power_ready(void) 661 { 662 return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready()); 663 } 664 665 /* 666 * All CPU instances have been initialized successfully. 667 */ 668 boolean_t 669 cpupm_throttle_ready(void) 670 { 671 return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready()); 672 } 673 674 /* 675 * All CPU instances have been initialized successfully. 676 */ 677 boolean_t 678 cpupm_cstate_ready(void) 679 { 680 return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready()); 681 } 682 683 void 684 cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 685 { 686 cpu_t *cp = ctx; 687 cpupm_mach_state_t *mach_state = 688 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 689 cpupm_notification_t *entry; 690 691 mutex_enter(&mach_state->ms_lock); 692 for (entry = mach_state->ms_handlers; entry != NULL; 693 entry = entry->nq_next) { 694 entry->nq_handler(obj, val, entry->nq_ctx); 695 } 696 mutex_exit(&mach_state->ms_lock); 697 } 698 699 /*ARGSUSED*/ 700 void 701 cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx) 702 { 703 #ifndef __xpv 704 cpupm_mach_state_t *mach_state = 705 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 706 cpupm_notification_t *entry; 707 708 entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP); 709 entry->nq_handler = handler; 710 entry->nq_ctx = ctx; 711 mutex_enter(&mach_state->ms_lock); 712 if (mach_state->ms_handlers == NULL) { 713 entry->nq_next = NULL; 714 mach_state->ms_handlers = entry; 715 cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle, 716 cpupm_notify_handler, cp); 717 718 } else { 719 entry->nq_next = mach_state->ms_handlers; 720 mach_state->ms_handlers = entry; 721 } 722 mutex_exit(&mach_state->ms_lock); 723 #endif 724 } 725 726 /*ARGSUSED*/ 727 static void 728 cpupm_free_notify_handlers(cpu_t *cp) 729 { 730 #ifndef __xpv 731 cpupm_mach_state_t *mach_state = 732 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 733 cpupm_notification_t *entry; 734 cpupm_notification_t *next; 735 736 mutex_enter(&mach_state->ms_lock); 737 if (mach_state->ms_handlers == NULL) { 738 mutex_exit(&mach_state->ms_lock); 739 return; 740 } 741 if (mach_state->ms_acpi_handle != NULL) { 742 cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle, 743 cpupm_notify_handler); 744 } 745 entry = mach_state->ms_handlers; 746 while (entry != NULL) { 747 next = entry->nq_next; 748 kmem_free(entry, sizeof (cpupm_notification_t)); 749 entry = next; 750 } 751 mach_state->ms_handlers = NULL; 752 mutex_exit(&mach_state->ms_lock); 753 #endif 754 } 755 756 /* 757 * Get the current max speed from the ACPI _PPC object 758 */ 759 /*ARGSUSED*/ 760 int 761 cpupm_get_top_speed(cpu_t *cp) 762 { 763 #ifndef __xpv 764 cpupm_mach_state_t *mach_state; 765 cpu_acpi_handle_t handle; 766 int plat_level; 767 uint_t nspeeds; 768 int max_level; 769 770 mach_state = 771 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 772 handle = mach_state->ms_acpi_handle; 773 774 cpu_acpi_cache_ppc(handle); 775 plat_level = CPU_ACPI_PPC(handle); 776 777 nspeeds = CPU_ACPI_PSTATES_COUNT(handle); 778 779 max_level = nspeeds - 1; 780 if ((plat_level < 0) || (plat_level > max_level)) { 781 cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: " 782 "_PPC out of range %d", cp->cpu_id, plat_level); 783 plat_level = 0; 784 } 785 786 return (plat_level); 787 #else 788 return (0); 789 #endif 790 } 791 792 /* 793 * This notification handler is called whenever the ACPI _PPC 794 * object changes. The _PPC is a sort of governor on power levels. 795 * It sets an upper threshold on which, _PSS defined, power levels 796 * are usuable. The _PPC value is dynamic and may change as properties 797 * (i.e., thermal or AC source) of the system change. 798 */ 799 800 static void 801 cpupm_power_manage_notifications(void *ctx) 802 { 803 cpu_t *cp = ctx; 804 int top_speed; 805 806 top_speed = cpupm_get_top_speed(cp); 807 cpupm_redefine_max_activepwr_state(cp, top_speed); 808 } 809 810 /* ARGSUSED */ 811 static void 812 cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 813 { 814 #ifndef __xpv 815 /* 816 * Currently, we handle _TPC,_CST and _PPC change notifications. 817 */ 818 if (val == CPUPM_TPC_CHANGE_NOTIFICATION) { 819 cpupm_throttle_manage_notification(ctx); 820 } else if (val == CPUPM_CST_CHANGE_NOTIFICATION) { 821 cpuidle_manage_cstates(ctx); 822 } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION) { 823 cpupm_power_manage_notifications(ctx); 824 } 825 #endif 826 } 827 828 /* 829 * Update cpupm cstate data each time CPU exits idle. 830 */ 831 void 832 cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end) 833 { 834 cs_data->cs_idle_exit = end; 835 } 836 837 /* 838 * Determine next cstate based on cpupm data. 839 * Update cpupm cstate data each time CPU goes idle. 840 * Do as much as possible in the idle state bookkeeping function because the 841 * performance impact while idle is minimal compared to in the wakeup function 842 * when there is real work to do. 843 */ 844 uint32_t 845 cpupm_next_cstate(cma_c_state_t *cs_data, hrtime_t start) 846 { 847 hrtime_t duration; 848 hrtime_t ave_interval; 849 hrtime_t ave_idle_time; 850 851 duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter; 852 scalehrtime(&duration); 853 cs_data->cs_idle += duration; 854 cs_data->cs_idle_enter = start; 855 856 ++cs_data->cs_cnt; 857 if (cs_data->cs_cnt > cpupm_cs_sample_tunable) { 858 cs_data->cs_smpl_len = start - cs_data->cs_smpl_start; 859 scalehrtime(&cs_data->cs_smpl_len); 860 cs_data->cs_smpl_len |= 1; /* protect from DIV 0 */ 861 cs_data->cs_smpl_idle = cs_data->cs_idle; 862 cs_data->cs_idle = 0; 863 cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) / 864 cs_data->cs_smpl_len); 865 866 cs_data->cs_smpl_start = start; 867 cs_data->cs_cnt = 0; 868 869 /* 870 * Strand level C-state policy 871 */ 872 cs_data->cs_next_cstate = CPU_ACPI_C3; 873 874 /* 875 * Will CPU be idle long enough to save power? 876 */ 877 ave_idle_time = (cs_data->cs_smpl_idle / 878 cpupm_cs_sample_tunable) / 1000; 879 if (ave_idle_time < (cs_data->cs_C2_latency * 880 cpupm_cs_idle_save_tunable)) { 881 cs_data->cs_next_cstate = CPU_ACPI_C1; 882 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, CPU, 883 int, 1); 884 return (cs_data->cs_next_cstate); 885 } else if (ave_idle_time < (cs_data->cs_C3_latency * 886 cpupm_cs_idle_save_tunable)) { 887 cs_data->cs_next_cstate = CPU_ACPI_C2; 888 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, CPU, 889 int, 2); 890 } 891 892 /* 893 * Wakeup often (even when non-idle time is very short)? 894 * Some producer/consumer type loads fall into this category. 895 */ 896 ave_interval = (cs_data->cs_smpl_len / cpupm_cs_sample_tunable) 897 / 1000; 898 if (ave_interval <= 899 (cs_data->cs_C2_latency * cpupm_cs_idle_cost_tunable)) { 900 cs_data->cs_next_cstate = CPU_ACPI_C1; 901 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, CPU, 902 int, 3); 903 return (cs_data->cs_next_cstate); 904 } else if (ave_interval <= 905 (cs_data->cs_C3_latency * cpupm_cs_idle_cost_tunable)) { 906 cs_data->cs_next_cstate = CPU_ACPI_C2; 907 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, CPU, 908 int, 4); 909 } 910 911 /* 912 * Idle percent 913 */ 914 if (cs_data->cs_smpl_idle_pct < cpupm_C2_idle_pct_tunable) { 915 cs_data->cs_next_cstate = CPU_ACPI_C1; 916 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, CPU, 917 int, 5); 918 return (cs_data->cs_next_cstate); 919 } else if ((cs_data->cs_next_cstate > CPU_ACPI_C2) && 920 (cs_data->cs_smpl_idle_pct < cpupm_C3_idle_pct_tunable)) { 921 cs_data->cs_next_cstate = CPU_ACPI_C2; 922 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, CPU, 923 int, 6); 924 } 925 } 926 927 return (cs_data->cs_next_cstate); 928 } 929