1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cpu_pm.h> 27 #include <sys/x86_archext.h> 28 #include <sys/sdt.h> 29 #include <sys/spl.h> 30 #include <sys/machsystm.h> 31 #include <sys/hpet.h> 32 #include <sys/cpupm.h> 33 #include <sys/cpu_idle.h> 34 #include <sys/cpu_acpi.h> 35 #include <sys/cpupm_throttle.h> 36 #include <sys/dtrace.h> 37 38 /* 39 * This callback is used to build the PPM CPU domains once 40 * all the CPU devices have been started. The callback is 41 * initialized by the PPM driver to point to a routine that 42 * will build the domains. 43 */ 44 void (*cpupm_rebuild_cpu_domains)(void); 45 46 /* 47 * This callback is used to reset the topspeed for all the 48 * CPU devices. The callback is initialized by the PPM driver to 49 * point to a routine that will reinitialize all the CPU devices 50 * once all the CPU devices have been started and the CPU domains 51 * built. 52 */ 53 void (*cpupm_init_topspeed)(void); 54 55 /* 56 * This callback is used to redefine the topspeed for a CPU device. 57 * Since all CPUs in a domain should have identical properties, this 58 * callback is initialized by the PPM driver to point to a routine 59 * that will redefine the topspeed for all devices in a CPU domain. 60 * This callback is exercised whenever an ACPI _PPC change notification 61 * is received by the CPU driver. 62 */ 63 void (*cpupm_redefine_topspeed)(void *); 64 65 /* 66 * This callback is used by the PPM driver to call into the CPU driver 67 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value). 68 */ 69 void (*cpupm_set_topspeed_callb)(void *, int); 70 71 /* 72 * This callback is used by the PPM driver to call into the CPU driver 73 * to set a new topspeed for a CPU. 74 */ 75 int (*cpupm_get_topspeed_callb)(void *); 76 77 static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *); 78 static void cpupm_free_notify_handlers(cpu_t *); 79 80 /* 81 * Until proven otherwise, all power states are manageable. 82 */ 83 static uint32_t cpupm_enabled = CPUPM_ALL_STATES; 84 85 /* 86 * Until all CPUs have started, we do not allow 87 * power management. 88 */ 89 static boolean_t cpupm_ready = B_FALSE; 90 91 cpupm_state_domains_t *cpupm_pstate_domains = NULL; 92 cpupm_state_domains_t *cpupm_tstate_domains = NULL; 93 cpupm_state_domains_t *cpupm_cstate_domains = NULL; 94 95 /* 96 * c-state tunables 97 * 98 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle 99 * divided by time spent in the idle state transitions. 100 * A value of 10 means the CPU will not spend more than 1/10 of its time 101 * in idle latency. The worst case performance will be 90% of non Deep C-state 102 * kernel. 103 * 104 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state 105 * before it is worth going there. Expressed as a multiple of latency. 106 */ 107 uint32_t cpupm_cs_sample_tunable = 5; /* samples in decision period */ 108 uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */ 109 uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */ 110 uint16_t cpupm_C2_idle_pct_tunable = 70; 111 uint16_t cpupm_C3_idle_pct_tunable = 80; 112 113 #ifndef __xpv 114 extern boolean_t cpupm_intel_init(cpu_t *); 115 extern boolean_t cpupm_amd_init(cpu_t *); 116 117 typedef struct cpupm_vendor { 118 boolean_t (*cpuv_init)(cpu_t *); 119 } cpupm_vendor_t; 120 121 /* 122 * Table of supported vendors. 123 */ 124 static cpupm_vendor_t cpupm_vendors[] = { 125 cpupm_intel_init, 126 cpupm_amd_init, 127 NULL 128 }; 129 #endif 130 131 /* 132 * Initialize the machine. 133 * See if a module exists for managing power for this CPU. 134 */ 135 /*ARGSUSED*/ 136 void 137 cpupm_init(cpu_t *cp) 138 { 139 #ifndef __xpv 140 cpupm_vendor_t *vendors; 141 cpupm_mach_state_t *mach_state; 142 struct machcpu *mcpu = &(cp->cpu_m); 143 int *speeds; 144 uint_t nspeeds; 145 int ret; 146 147 mach_state = cp->cpu_m.mcpu_pm_mach_state = 148 kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP); 149 mach_state->ms_caps = CPUPM_NO_STATES; 150 mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL); 151 152 mach_state->ms_acpi_handle = cpu_acpi_init(cp); 153 if (mach_state->ms_acpi_handle == NULL) { 154 cpupm_free(cp); 155 cmn_err(CE_WARN, "!cpupm_init: processor %d: " 156 "unable to get ACPI handle", cp->cpu_id); 157 cmn_err(CE_NOTE, "!CPU power management will not function."); 158 CPUPM_DISABLE(); 159 return; 160 } 161 162 /* 163 * Loop through the CPU management module table and see if 164 * any of the modules implement CPU power management 165 * for this CPU. 166 */ 167 for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) { 168 if (vendors->cpuv_init(cp)) 169 break; 170 } 171 172 /* 173 * Nope, we can't power manage this CPU. 174 */ 175 if (vendors == NULL) { 176 cpupm_free(cp); 177 CPUPM_DISABLE(); 178 return; 179 } 180 181 /* 182 * If P-state support exists for this system, then initialize it. 183 */ 184 if (mach_state->ms_pstate.cma_ops != NULL) { 185 ret = mach_state->ms_pstate.cma_ops->cpus_init(cp); 186 if (ret != 0) { 187 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 188 " unable to initialize P-state support", 189 cp->cpu_id); 190 mach_state->ms_pstate.cma_ops = NULL; 191 cpupm_disable(CPUPM_P_STATES); 192 } else { 193 nspeeds = cpupm_get_speeds(cp, &speeds); 194 if (nspeeds == 0) { 195 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 196 " no speeds to manage", cp->cpu_id); 197 } else { 198 cpupm_set_supp_freqs(cp, speeds, nspeeds); 199 cpupm_free_speeds(speeds, nspeeds); 200 mach_state->ms_caps |= CPUPM_P_STATES; 201 } 202 } 203 } 204 205 if (mach_state->ms_tstate.cma_ops != NULL) { 206 ret = mach_state->ms_tstate.cma_ops->cpus_init(cp); 207 if (ret != 0) { 208 char err_msg[128]; 209 int p_res; 210 p_res = snprintf(err_msg, sizeof (err_msg), 211 "cpupm_init: processor %d: unable to initialize " 212 "T-state support", cp->cpu_id); 213 if (p_res >= 0) 214 DTRACE_PROBE1(cpu_ts_err_msg, char *, err_msg); 215 mach_state->ms_tstate.cma_ops = NULL; 216 cpupm_disable(CPUPM_T_STATES); 217 } else { 218 mach_state->ms_caps |= CPUPM_T_STATES; 219 } 220 } 221 222 /* 223 * If C-states support exists for this system, then initialize it. 224 */ 225 if (mach_state->ms_cstate.cma_ops != NULL) { 226 ret = mach_state->ms_cstate.cma_ops->cpus_init(cp); 227 if (ret != 0) { 228 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 229 " unable to initialize C-state support", 230 cp->cpu_id); 231 mach_state->ms_cstate.cma_ops = NULL; 232 mcpu->max_cstates = CPU_ACPI_C1; 233 cpupm_disable(CPUPM_C_STATES); 234 idle_cpu = non_deep_idle_cpu; 235 disp_enq_thread = non_deep_idle_disp_enq_thread; 236 } else if (cpu_deep_cstates_supported()) { 237 mcpu->max_cstates = cpu_acpi_get_max_cstates( 238 mach_state->ms_acpi_handle); 239 if (mcpu->max_cstates > CPU_ACPI_C1) { 240 hpet.callback(CST_EVENT_MULTIPLE_CSTATES); 241 CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 242 mcpu->mcpu_idle_type = CPU_ACPI_C1; 243 disp_enq_thread = cstate_wakeup; 244 } else { 245 hpet.callback(CST_EVENT_ONE_CSTATE); 246 } 247 mach_state->ms_caps |= CPUPM_C_STATES; 248 } else { 249 mcpu->max_cstates = CPU_ACPI_C1; 250 idle_cpu = non_deep_idle_cpu; 251 disp_enq_thread = non_deep_idle_disp_enq_thread; 252 } 253 } 254 255 256 if (mach_state->ms_caps == CPUPM_NO_STATES) { 257 cpupm_free(cp); 258 CPUPM_DISABLE(); 259 return; 260 } 261 262 if ((mach_state->ms_caps & CPUPM_T_STATES) || 263 (mach_state->ms_caps & CPUPM_P_STATES) || 264 (mach_state->ms_caps & CPUPM_C_STATES)) 265 cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp); 266 #endif 267 } 268 269 /* 270 * Free any resources allocated by cpupm_init(). 271 */ 272 /*ARGSUSED*/ 273 void 274 cpupm_free(cpu_t *cp) 275 { 276 #ifndef __xpv 277 cpupm_mach_state_t *mach_state = 278 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 279 280 if (mach_state == NULL) 281 return; 282 if (mach_state->ms_pstate.cma_ops != NULL) { 283 mach_state->ms_pstate.cma_ops->cpus_fini(cp); 284 mach_state->ms_pstate.cma_ops = NULL; 285 } 286 287 if (mach_state->ms_tstate.cma_ops != NULL) { 288 mach_state->ms_tstate.cma_ops->cpus_fini(cp); 289 mach_state->ms_tstate.cma_ops = NULL; 290 } 291 292 if (mach_state->ms_cstate.cma_ops != NULL) { 293 mach_state->ms_cstate.cma_ops->cpus_fini(cp); 294 mach_state->ms_cstate.cma_ops = NULL; 295 } 296 297 cpupm_free_notify_handlers(cp); 298 299 if (mach_state->ms_acpi_handle != NULL) { 300 cpu_acpi_fini(mach_state->ms_acpi_handle); 301 mach_state->ms_acpi_handle = NULL; 302 } 303 304 mutex_destroy(&mach_state->ms_lock); 305 kmem_free(mach_state, sizeof (cpupm_mach_state_t)); 306 cp->cpu_m.mcpu_pm_mach_state = NULL; 307 #endif 308 } 309 310 /* 311 * If all CPUs have started and at least one power state is manageable, 312 * then the CPUs are ready for power management. 313 */ 314 boolean_t 315 cpupm_is_ready() 316 { 317 #ifndef __xpv 318 if (cpupm_enabled == CPUPM_NO_STATES) 319 return (B_FALSE); 320 return (cpupm_ready); 321 #else 322 return (B_FALSE); 323 #endif 324 325 } 326 327 boolean_t 328 cpupm_is_enabled(uint32_t state) 329 { 330 return ((cpupm_enabled & state) == state); 331 } 332 333 /* 334 * By default, all states are enabled. 335 */ 336 void 337 cpupm_disable(uint32_t state) 338 { 339 340 if (state & CPUPM_P_STATES) { 341 cpupm_free_domains(&cpupm_pstate_domains); 342 } 343 if (state & CPUPM_T_STATES) { 344 cpupm_free_domains(&cpupm_tstate_domains); 345 } 346 if (state & CPUPM_C_STATES) { 347 cpupm_free_domains(&cpupm_cstate_domains); 348 } 349 cpupm_enabled &= ~state; 350 } 351 352 /* 353 * Once all CPUs have been started, the PPM driver should build CPU 354 * domains and initialize the topspeed for all CPU devices. 355 */ 356 void 357 cpupm_post_startup() 358 { 359 #ifndef __xpv 360 /* 361 * The CPU domain built by the PPM during CPUs attaching 362 * should be rebuilt with the information retrieved from 363 * ACPI. 364 */ 365 if (cpupm_rebuild_cpu_domains != NULL) 366 (*cpupm_rebuild_cpu_domains)(); 367 368 /* 369 * Only initialize the topspeed if P-states are enabled. 370 */ 371 if (cpupm_enabled & CPUPM_P_STATES && cpupm_init_topspeed != NULL) 372 (*cpupm_init_topspeed)(); 373 #endif 374 cpupm_ready = B_TRUE; 375 } 376 377 /* 378 * Allocate power domains for C,P and T States 379 */ 380 void 381 cpupm_alloc_domains(cpu_t *cp, int state) 382 { 383 cpupm_mach_state_t *mach_state = 384 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 385 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 386 cpupm_state_domains_t **dom_ptr; 387 cpupm_state_domains_t *dptr; 388 cpupm_state_domains_t **mach_dom_state_ptr; 389 uint32_t domain; 390 uint32_t type; 391 392 switch (state) { 393 case CPUPM_P_STATES: 394 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) { 395 domain = CPU_ACPI_PSD(handle).sd_domain; 396 type = CPU_ACPI_PSD(handle).sd_type; 397 } else { 398 mutex_enter(&cpu_lock); 399 domain = cpuid_get_chipid(cp); 400 mutex_exit(&cpu_lock); 401 type = CPU_ACPI_HW_ALL; 402 } 403 dom_ptr = &cpupm_pstate_domains; 404 mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain; 405 break; 406 case CPUPM_T_STATES: 407 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) { 408 domain = CPU_ACPI_TSD(handle).sd_domain; 409 type = CPU_ACPI_TSD(handle).sd_type; 410 } else { 411 mutex_enter(&cpu_lock); 412 domain = cpuid_get_chipid(cp); 413 mutex_exit(&cpu_lock); 414 type = CPU_ACPI_HW_ALL; 415 } 416 dom_ptr = &cpupm_tstate_domains; 417 mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain; 418 break; 419 case CPUPM_C_STATES: 420 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) { 421 domain = CPU_ACPI_CSD(handle).sd_domain; 422 type = CPU_ACPI_CSD(handle).sd_type; 423 } else { 424 mutex_enter(&cpu_lock); 425 domain = cpuid_get_coreid(cp); 426 mutex_exit(&cpu_lock); 427 type = CPU_ACPI_HW_ALL; 428 } 429 dom_ptr = &cpupm_cstate_domains; 430 mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain; 431 break; 432 default: 433 return; 434 } 435 436 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 437 if (dptr->pm_domain == domain) 438 break; 439 } 440 441 /* new domain is created and linked at the head */ 442 if (dptr == NULL) { 443 dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP); 444 dptr->pm_domain = domain; 445 dptr->pm_type = type; 446 dptr->pm_next = *dom_ptr; 447 mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN, 448 (void *)ipltospl(DISP_LEVEL)); 449 CPUSET_ZERO(dptr->pm_cpus); 450 *dom_ptr = dptr; 451 } 452 CPUSET_ADD(dptr->pm_cpus, cp->cpu_id); 453 *mach_dom_state_ptr = dptr; 454 } 455 456 /* 457 * Free C, P or T state power domains 458 */ 459 void 460 cpupm_free_domains(cpupm_state_domains_t **dom_ptr) 461 { 462 cpupm_state_domains_t *this_domain, *next_domain; 463 464 this_domain = *dom_ptr; 465 while (this_domain != NULL) { 466 next_domain = this_domain->pm_next; 467 mutex_destroy(&this_domain->pm_lock); 468 kmem_free((void *)this_domain, 469 sizeof (cpupm_state_domains_t)); 470 this_domain = next_domain; 471 } 472 *dom_ptr = NULL; 473 } 474 475 void 476 cpupm_alloc_ms_cstate(cpu_t *cp) 477 { 478 cpupm_mach_state_t *mach_state; 479 cpupm_mach_acpi_state_t *ms_cstate; 480 481 mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 482 ms_cstate = &mach_state->ms_cstate; 483 ASSERT(ms_cstate->cma_state.cstate == NULL); 484 ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t), 485 KM_SLEEP); 486 ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1; 487 } 488 489 void 490 cpupm_free_ms_cstate(cpu_t *cp) 491 { 492 cpupm_mach_state_t *mach_state = 493 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 494 cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate; 495 496 if (ms_cstate->cma_state.cstate != NULL) { 497 kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t)); 498 ms_cstate->cma_state.cstate = NULL; 499 } 500 } 501 502 void 503 cpupm_state_change(cpu_t *cp, int level, int state) 504 { 505 cpupm_mach_state_t *mach_state = 506 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 507 cpupm_state_ops_t *state_ops; 508 cpupm_state_domains_t *state_domain; 509 cpuset_t set; 510 511 DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level); 512 513 if (mach_state == NULL) { 514 return; 515 } 516 517 switch (state) { 518 case CPUPM_P_STATES: 519 state_ops = mach_state->ms_pstate.cma_ops; 520 state_domain = mach_state->ms_pstate.cma_domain; 521 break; 522 case CPUPM_T_STATES: 523 state_ops = mach_state->ms_tstate.cma_ops; 524 state_domain = mach_state->ms_tstate.cma_domain; 525 break; 526 default: 527 break; 528 } 529 530 switch (state_domain->pm_type) { 531 case CPU_ACPI_SW_ANY: 532 /* 533 * A request on any CPU in the domain transitions the domain 534 */ 535 CPUSET_ONLY(set, cp->cpu_id); 536 state_ops->cpus_change(set, level); 537 break; 538 case CPU_ACPI_SW_ALL: 539 /* 540 * All CPUs in the domain must request the transition 541 */ 542 case CPU_ACPI_HW_ALL: 543 /* 544 * P/T-state transitions are coordinated by the hardware 545 * For now, request the transition on all CPUs in the domain, 546 * but looking ahead we can probably be smarter about this. 547 */ 548 mutex_enter(&state_domain->pm_lock); 549 state_ops->cpus_change(state_domain->pm_cpus, level); 550 mutex_exit(&state_domain->pm_lock); 551 break; 552 default: 553 cmn_err(CE_WARN, "Unknown domain coordination type: %d", 554 state_domain->pm_type); 555 } 556 } 557 558 /* 559 * CPU PM interfaces exposed to the CPU power manager 560 */ 561 /*ARGSUSED*/ 562 id_t 563 cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type) 564 { 565 cpupm_mach_state_t *mach_state = 566 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 567 568 if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) && 569 !cpupm_is_enabled(CPUPM_C_STATES))) { 570 return (CPUPM_NO_DOMAIN); 571 } 572 if (type == CPUPM_DTYPE_ACTIVE) { 573 /* 574 * Return P-State domain for the specified CPU 575 */ 576 if (mach_state->ms_pstate.cma_domain) { 577 return (mach_state->ms_pstate.cma_domain->pm_domain); 578 } 579 } else if (type == CPUPM_DTYPE_IDLE) { 580 /* 581 * Return C-State domain for the specified CPU 582 */ 583 if (mach_state->ms_cstate.cma_domain) { 584 return (mach_state->ms_cstate.cma_domain->pm_domain); 585 } 586 } 587 return (CPUPM_NO_DOMAIN); 588 } 589 590 /*ARGSUSED*/ 591 uint_t 592 cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type, 593 cpupm_state_t *states) 594 { 595 int *speeds; 596 uint_t nspeeds, i; 597 598 /* 599 * Idle domain support unimplemented 600 */ 601 if (type != CPUPM_DTYPE_ACTIVE) { 602 return (0); 603 } 604 nspeeds = cpupm_get_speeds(cp, &speeds); 605 606 /* 607 * If the caller passes NULL for states, just return the 608 * number of states. 609 */ 610 if (states != NULL) { 611 for (i = 0; i < nspeeds; i++) { 612 states[i].cps_speed = speeds[i]; 613 states[i].cps_handle = (cpupm_handle_t)i; 614 } 615 } 616 cpupm_free_speeds(speeds, nspeeds); 617 return (nspeeds); 618 } 619 620 /*ARGSUSED*/ 621 int 622 cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state) 623 { 624 if (!cpupm_is_ready()) 625 return (-1); 626 627 cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES); 628 629 return (0); 630 } 631 632 /*ARGSUSED*/ 633 /* 634 * Note: It is the responsibility of the users of 635 * cpupm_get_speeds() to free the memory allocated 636 * for speeds using cpupm_free_speeds() 637 */ 638 uint_t 639 cpupm_get_speeds(cpu_t *cp, int **speeds) 640 { 641 #ifndef __xpv 642 cpupm_mach_state_t *mach_state = 643 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 644 return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds)); 645 #else 646 return (0); 647 #endif 648 } 649 650 /*ARGSUSED*/ 651 void 652 cpupm_free_speeds(int *speeds, uint_t nspeeds) 653 { 654 #ifndef __xpv 655 cpu_acpi_free_speeds(speeds, nspeeds); 656 #endif 657 } 658 659 /* 660 * All CPU instances have been initialized successfully. 661 */ 662 boolean_t 663 cpupm_power_ready(void) 664 { 665 return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready()); 666 } 667 668 /* 669 * All CPU instances have been initialized successfully. 670 */ 671 boolean_t 672 cpupm_throttle_ready(void) 673 { 674 return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready()); 675 } 676 677 /* 678 * All CPU instances have been initialized successfully. 679 */ 680 boolean_t 681 cpupm_cstate_ready(void) 682 { 683 return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready()); 684 } 685 686 void 687 cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 688 { 689 cpu_t *cp = ctx; 690 cpupm_mach_state_t *mach_state = 691 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 692 cpupm_notification_t *entry; 693 694 mutex_enter(&mach_state->ms_lock); 695 for (entry = mach_state->ms_handlers; entry != NULL; 696 entry = entry->nq_next) { 697 entry->nq_handler(obj, val, entry->nq_ctx); 698 } 699 mutex_exit(&mach_state->ms_lock); 700 } 701 702 /*ARGSUSED*/ 703 void 704 cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx) 705 { 706 #ifndef __xpv 707 cpupm_mach_state_t *mach_state = 708 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 709 cpupm_notification_t *entry; 710 711 entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP); 712 entry->nq_handler = handler; 713 entry->nq_ctx = ctx; 714 mutex_enter(&mach_state->ms_lock); 715 if (mach_state->ms_handlers == NULL) { 716 entry->nq_next = NULL; 717 mach_state->ms_handlers = entry; 718 cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle, 719 cpupm_notify_handler, cp); 720 721 } else { 722 entry->nq_next = mach_state->ms_handlers; 723 mach_state->ms_handlers = entry; 724 } 725 mutex_exit(&mach_state->ms_lock); 726 #endif 727 } 728 729 /*ARGSUSED*/ 730 static void 731 cpupm_free_notify_handlers(cpu_t *cp) 732 { 733 #ifndef __xpv 734 cpupm_mach_state_t *mach_state = 735 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 736 cpupm_notification_t *entry; 737 cpupm_notification_t *next; 738 739 mutex_enter(&mach_state->ms_lock); 740 if (mach_state->ms_handlers == NULL) { 741 mutex_exit(&mach_state->ms_lock); 742 return; 743 } 744 if (mach_state->ms_acpi_handle != NULL) { 745 cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle, 746 cpupm_notify_handler); 747 } 748 entry = mach_state->ms_handlers; 749 while (entry != NULL) { 750 next = entry->nq_next; 751 kmem_free(entry, sizeof (cpupm_notification_t)); 752 entry = next; 753 } 754 mach_state->ms_handlers = NULL; 755 mutex_exit(&mach_state->ms_lock); 756 #endif 757 } 758 759 /* 760 * Get the current max speed from the ACPI _PPC object 761 */ 762 /*ARGSUSED*/ 763 int 764 cpupm_get_top_speed(cpu_t *cp) 765 { 766 #ifndef __xpv 767 cpupm_mach_state_t *mach_state; 768 cpu_acpi_handle_t handle; 769 int plat_level; 770 uint_t nspeeds; 771 int max_level; 772 773 mach_state = 774 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 775 handle = mach_state->ms_acpi_handle; 776 777 cpu_acpi_cache_ppc(handle); 778 plat_level = CPU_ACPI_PPC(handle); 779 780 nspeeds = CPU_ACPI_PSTATES_COUNT(handle); 781 782 max_level = nspeeds - 1; 783 if ((plat_level < 0) || (plat_level > max_level)) { 784 cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: " 785 "_PPC out of range %d", cp->cpu_id, plat_level); 786 plat_level = 0; 787 } 788 789 return (plat_level); 790 #else 791 return (0); 792 #endif 793 } 794 795 /* 796 * This notification handler is called whenever the ACPI _PPC 797 * object changes. The _PPC is a sort of governor on power levels. 798 * It sets an upper threshold on which, _PSS defined, power levels 799 * are usuable. The _PPC value is dynamic and may change as properties 800 * (i.e., thermal or AC source) of the system change. 801 */ 802 803 static void 804 cpupm_power_manage_notifications(void *ctx) 805 { 806 cpu_t *cp = ctx; 807 int top_speed; 808 809 top_speed = cpupm_get_top_speed(cp); 810 cpupm_redefine_max_activepwr_state(cp, top_speed); 811 } 812 813 /* ARGSUSED */ 814 static void 815 cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 816 { 817 #ifndef __xpv 818 819 cpu_t *cp = ctx; 820 cpupm_mach_state_t *mach_state = 821 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 822 823 if (mach_state == NULL) 824 return; 825 826 /* 827 * Currently, we handle _TPC,_CST and _PPC change notifications. 828 */ 829 if (val == CPUPM_TPC_CHANGE_NOTIFICATION && 830 mach_state->ms_caps & CPUPM_T_STATES) { 831 cpupm_throttle_manage_notification(ctx); 832 } else if (val == CPUPM_CST_CHANGE_NOTIFICATION && 833 mach_state->ms_caps & CPUPM_C_STATES) { 834 cpuidle_manage_cstates(ctx); 835 } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION && 836 mach_state->ms_caps & CPUPM_P_STATES) { 837 cpupm_power_manage_notifications(ctx); 838 } 839 #endif 840 } 841 842 /* 843 * Update cpupm cstate data each time CPU exits idle. 844 */ 845 void 846 cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end) 847 { 848 cs_data->cs_idle_exit = end; 849 } 850 851 /* 852 * Determine next cstate based on cpupm data. 853 * Update cpupm cstate data each time CPU goes idle. 854 * Do as much as possible in the idle state bookkeeping function because the 855 * performance impact while idle is minimal compared to in the wakeup function 856 * when there is real work to do. 857 */ 858 uint32_t 859 cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates, 860 uint32_t cs_count, hrtime_t start) 861 { 862 hrtime_t duration; 863 hrtime_t ave_interval; 864 hrtime_t ave_idle_time; 865 uint32_t i; 866 867 duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter; 868 scalehrtime(&duration); 869 cs_data->cs_idle += duration; 870 cs_data->cs_idle_enter = start; 871 872 ++cs_data->cs_cnt; 873 if (cs_data->cs_cnt > cpupm_cs_sample_tunable) { 874 cs_data->cs_smpl_len = start - cs_data->cs_smpl_start; 875 scalehrtime(&cs_data->cs_smpl_len); 876 cs_data->cs_smpl_len |= 1; /* protect from DIV 0 */ 877 cs_data->cs_smpl_idle = cs_data->cs_idle; 878 cs_data->cs_idle = 0; 879 cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) / 880 cs_data->cs_smpl_len); 881 882 cs_data->cs_smpl_start = start; 883 cs_data->cs_cnt = 0; 884 885 /* 886 * Strand level C-state policy 887 * The cpu_acpi_cstate_t *cstates array is not required to 888 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3. 889 * There are cs_count entries in the cstates array. 890 * cs_data->cs_next_cstate contains the index of the next 891 * C-state this CPU should enter. 892 */ 893 ASSERT(cstates[0].cs_type == CPU_ACPI_C1); 894 895 /* 896 * Will CPU be idle long enough to save power? 897 */ 898 ave_idle_time = (cs_data->cs_smpl_idle / 899 cpupm_cs_sample_tunable) / 1000; 900 for (i = 1; i < cs_count; ++i) { 901 if (ave_idle_time < (cstates[i].cs_latency * 902 cpupm_cs_idle_save_tunable)) { 903 cs_count = i; 904 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 905 CPU, int, i); 906 } 907 } 908 909 /* 910 * Wakeup often (even when non-idle time is very short)? 911 * Some producer/consumer type loads fall into this category. 912 */ 913 ave_interval = (cs_data->cs_smpl_len / cpupm_cs_sample_tunable) 914 / 1000; 915 for (i = 1; i < cs_count; ++i) { 916 if (ave_interval <= (cstates[i].cs_latency * 917 cpupm_cs_idle_cost_tunable)) { 918 cs_count = i; 919 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 920 CPU, int, (CPU_MAX_CSTATES + i)); 921 } 922 } 923 924 /* 925 * Idle percent 926 */ 927 for (i = 1; i < cs_count; ++i) { 928 switch (cstates[i].cs_type) { 929 case CPU_ACPI_C2: 930 if (cs_data->cs_smpl_idle_pct < 931 cpupm_C2_idle_pct_tunable) { 932 cs_count = i; 933 DTRACE_PROBE2(cpupm__next__cstate, 934 cpu_t *, CPU, int, 935 ((2 * CPU_MAX_CSTATES) + i)); 936 } 937 break; 938 939 case CPU_ACPI_C3: 940 if (cs_data->cs_smpl_idle_pct < 941 cpupm_C3_idle_pct_tunable) { 942 cs_count = i; 943 DTRACE_PROBE2(cpupm__next__cstate, 944 cpu_t *, CPU, int, 945 ((2 * CPU_MAX_CSTATES) + i)); 946 } 947 break; 948 } 949 } 950 951 cs_data->cs_next_cstate = cs_count - 1; 952 } 953 954 return (cs_data->cs_next_cstate); 955 } 956