1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/cpu_pm.h> 31 #include <sys/x86_archext.h> 32 #include <sys/sdt.h> 33 #include <sys/spl.h> 34 #include <sys/machsystm.h> 35 #include <sys/hpet.h> 36 #include <sys/cpupm.h> 37 #include <sys/cpu_idle.h> 38 #include <sys/cpu_acpi.h> 39 #include <sys/cpupm_throttle.h> 40 #include <sys/dtrace.h> 41 42 /* 43 * This callback is used to build the PPM CPU domains once 44 * all the CPU devices have been started. The callback is 45 * initialized by the PPM driver to point to a routine that 46 * will build the domains. 47 */ 48 void (*cpupm_rebuild_cpu_domains)(void); 49 50 /* 51 * This callback is used to reset the topspeed for all the 52 * CPU devices. The callback is initialized by the PPM driver to 53 * point to a routine that will reinitialize all the CPU devices 54 * once all the CPU devices have been started and the CPU domains 55 * built. 56 */ 57 void (*cpupm_init_topspeed)(void); 58 59 /* 60 * This callback is used to redefine the topspeed for a CPU device. 61 * Since all CPUs in a domain should have identical properties, this 62 * callback is initialized by the PPM driver to point to a routine 63 * that will redefine the topspeed for all devices in a CPU domain. 64 * This callback is exercised whenever an ACPI _PPC change notification 65 * is received by the CPU driver. 66 */ 67 void (*cpupm_redefine_topspeed)(void *); 68 69 /* 70 * This callback is used by the PPM driver to call into the CPU driver 71 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value). 72 */ 73 void (*cpupm_set_topspeed_callb)(void *, int); 74 75 /* 76 * This callback is used by the PPM driver to call into the CPU driver 77 * to set a new topspeed for a CPU. 78 */ 79 int (*cpupm_get_topspeed_callb)(void *); 80 81 static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *); 82 static void cpupm_free_notify_handlers(cpu_t *); 83 84 /* 85 * Until proven otherwise, all power states are manageable. 86 */ 87 static uint32_t cpupm_enabled = CPUPM_ALL_STATES; 88 89 /* 90 * Until all CPUs have started, we do not allow 91 * power management. 92 */ 93 static boolean_t cpupm_ready = B_FALSE; 94 95 cpupm_state_domains_t *cpupm_pstate_domains = NULL; 96 cpupm_state_domains_t *cpupm_tstate_domains = NULL; 97 cpupm_state_domains_t *cpupm_cstate_domains = NULL; 98 99 /* 100 * c-state tunables 101 * 102 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle 103 * divided by time spent in the idle state transitions. 104 * A value of 10 means the CPU will not spend more than 1/10 of its time 105 * in idle latency. The worst case performance will be 90% of non Deep C-state 106 * kernel. 107 * 108 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state 109 * before it is worth going there. Expressed as a multiple of latency. 110 */ 111 uint32_t cpupm_cs_sample_tunable = 5; /* samples in decision period */ 112 uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */ 113 uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */ 114 uint16_t cpupm_C2_idle_pct_tunable = 70; 115 uint16_t cpupm_C3_idle_pct_tunable = 80; 116 117 #ifndef __xpv 118 extern boolean_t cpupm_intel_init(cpu_t *); 119 extern boolean_t cpupm_amd_init(cpu_t *); 120 121 typedef struct cpupm_vendor { 122 boolean_t (*cpuv_init)(cpu_t *); 123 } cpupm_vendor_t; 124 125 /* 126 * Table of supported vendors. 127 */ 128 static cpupm_vendor_t cpupm_vendors[] = { 129 cpupm_intel_init, 130 cpupm_amd_init, 131 NULL 132 }; 133 #endif 134 135 /* 136 * Initialize the machine. 137 * See if a module exists for managing power for this CPU. 138 */ 139 /*ARGSUSED*/ 140 void 141 cpupm_init(cpu_t *cp) 142 { 143 #ifndef __xpv 144 cpupm_vendor_t *vendors; 145 cpupm_mach_state_t *mach_state; 146 struct machcpu *mcpu = &(cp->cpu_m); 147 int *speeds; 148 uint_t nspeeds; 149 int ret; 150 151 mach_state = cp->cpu_m.mcpu_pm_mach_state = 152 kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP); 153 mach_state->ms_caps = CPUPM_NO_STATES; 154 mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL); 155 156 mach_state->ms_acpi_handle = cpu_acpi_init(cp); 157 if (mach_state->ms_acpi_handle == NULL) { 158 cpupm_free(cp); 159 cmn_err(CE_WARN, "!cpupm_init: processor %d: " 160 "unable to get ACPI handle", cp->cpu_id); 161 cmn_err(CE_NOTE, "!CPU power management will not function."); 162 CPUPM_DISABLE(); 163 return; 164 } 165 166 /* 167 * Loop through the CPU management module table and see if 168 * any of the modules implement CPU power management 169 * for this CPU. 170 */ 171 for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) { 172 if (vendors->cpuv_init(cp)) 173 break; 174 } 175 176 /* 177 * Nope, we can't power manage this CPU. 178 */ 179 if (vendors == NULL) { 180 cpupm_free(cp); 181 CPUPM_DISABLE(); 182 return; 183 } 184 185 /* 186 * If P-state support exists for this system, then initialize it. 187 */ 188 if (mach_state->ms_pstate.cma_ops != NULL) { 189 ret = mach_state->ms_pstate.cma_ops->cpus_init(cp); 190 if (ret != 0) { 191 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 192 " unable to initialize P-state support", 193 cp->cpu_id); 194 mach_state->ms_pstate.cma_ops = NULL; 195 cpupm_disable(CPUPM_P_STATES); 196 } else { 197 nspeeds = cpupm_get_speeds(cp, &speeds); 198 if (nspeeds == 0) { 199 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 200 " no speeds to manage", cp->cpu_id); 201 } else { 202 cpupm_set_supp_freqs(cp, speeds, nspeeds); 203 cpupm_free_speeds(speeds, nspeeds); 204 mach_state->ms_caps |= CPUPM_P_STATES; 205 } 206 } 207 } 208 209 if (mach_state->ms_tstate.cma_ops != NULL) { 210 ret = mach_state->ms_tstate.cma_ops->cpus_init(cp); 211 if (ret != 0) { 212 char err_msg[128]; 213 int p_res; 214 p_res = snprintf(err_msg, sizeof (err_msg), 215 "cpupm_init: processor %d: unable to initialize " 216 "T-state support", cp->cpu_id); 217 if (p_res >= 0) 218 DTRACE_PROBE1(cpu_ts_err_msg, char *, err_msg); 219 mach_state->ms_tstate.cma_ops = NULL; 220 cpupm_disable(CPUPM_T_STATES); 221 } else { 222 mach_state->ms_caps |= CPUPM_T_STATES; 223 } 224 } 225 226 /* 227 * If C-states support exists for this system, then initialize it. 228 */ 229 if (mach_state->ms_cstate.cma_ops != NULL) { 230 ret = mach_state->ms_cstate.cma_ops->cpus_init(cp); 231 if (ret != 0) { 232 cmn_err(CE_WARN, "!cpupm_init: processor %d:" 233 " unable to initialize C-state support", 234 cp->cpu_id); 235 mach_state->ms_cstate.cma_ops = NULL; 236 mcpu->max_cstates = CPU_ACPI_C1; 237 cpupm_disable(CPUPM_C_STATES); 238 idle_cpu = non_deep_idle_cpu; 239 disp_enq_thread = non_deep_idle_disp_enq_thread; 240 } else if (cpu_deep_cstates_supported()) { 241 mcpu->max_cstates = cpu_acpi_get_max_cstates( 242 mach_state->ms_acpi_handle); 243 if (mcpu->max_cstates > CPU_ACPI_C1) { 244 (void) cstate_timer_callback( 245 CST_EVENT_MULTIPLE_CSTATES); 246 CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 247 mcpu->mcpu_idle_type = CPU_ACPI_C1; 248 disp_enq_thread = cstate_wakeup; 249 } else { 250 (void) cstate_timer_callback( 251 CST_EVENT_ONE_CSTATE); 252 } 253 mach_state->ms_caps |= CPUPM_C_STATES; 254 } else { 255 mcpu->max_cstates = CPU_ACPI_C1; 256 idle_cpu = non_deep_idle_cpu; 257 disp_enq_thread = non_deep_idle_disp_enq_thread; 258 } 259 } 260 261 262 if (mach_state->ms_caps == CPUPM_NO_STATES) { 263 cpupm_free(cp); 264 CPUPM_DISABLE(); 265 return; 266 } 267 268 if ((mach_state->ms_caps & CPUPM_T_STATES) || 269 (mach_state->ms_caps & CPUPM_P_STATES) || 270 (mach_state->ms_caps & CPUPM_C_STATES)) 271 cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp); 272 #endif 273 } 274 275 /* 276 * Free any resources allocated by cpupm_init(). 277 */ 278 /*ARGSUSED*/ 279 void 280 cpupm_free(cpu_t *cp) 281 { 282 #ifndef __xpv 283 cpupm_mach_state_t *mach_state = 284 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 285 286 if (mach_state == NULL) 287 return; 288 if (mach_state->ms_pstate.cma_ops != NULL) { 289 mach_state->ms_pstate.cma_ops->cpus_fini(cp); 290 mach_state->ms_pstate.cma_ops = NULL; 291 } 292 293 if (mach_state->ms_tstate.cma_ops != NULL) { 294 mach_state->ms_tstate.cma_ops->cpus_fini(cp); 295 mach_state->ms_tstate.cma_ops = NULL; 296 } 297 298 if (mach_state->ms_cstate.cma_ops != NULL) { 299 mach_state->ms_cstate.cma_ops->cpus_fini(cp); 300 mach_state->ms_cstate.cma_ops = NULL; 301 } 302 303 cpupm_free_notify_handlers(cp); 304 305 if (mach_state->ms_acpi_handle != NULL) { 306 cpu_acpi_fini(mach_state->ms_acpi_handle); 307 mach_state->ms_acpi_handle = NULL; 308 } 309 310 mutex_destroy(&mach_state->ms_lock); 311 kmem_free(mach_state, sizeof (cpupm_mach_state_t)); 312 cp->cpu_m.mcpu_pm_mach_state = NULL; 313 #endif 314 } 315 316 /* 317 * If all CPUs have started and at least one power state is manageable, 318 * then the CPUs are ready for power management. 319 */ 320 boolean_t 321 cpupm_is_ready() 322 { 323 #ifndef __xpv 324 if (cpupm_enabled == CPUPM_NO_STATES) 325 return (B_FALSE); 326 return (cpupm_ready); 327 #else 328 return (B_FALSE); 329 #endif 330 331 } 332 333 boolean_t 334 cpupm_is_enabled(uint32_t state) 335 { 336 return ((cpupm_enabled & state) == state); 337 } 338 339 /* 340 * By default, all states are enabled. 341 */ 342 void 343 cpupm_disable(uint32_t state) 344 { 345 346 if (state & CPUPM_P_STATES) { 347 cpupm_free_domains(&cpupm_pstate_domains); 348 } 349 if (state & CPUPM_T_STATES) { 350 cpupm_free_domains(&cpupm_tstate_domains); 351 } 352 if (state & CPUPM_C_STATES) { 353 cpupm_free_domains(&cpupm_cstate_domains); 354 } 355 cpupm_enabled &= ~state; 356 } 357 358 /* 359 * Once all CPUs have been started, the PPM driver should build CPU 360 * domains and initialize the topspeed for all CPU devices. 361 */ 362 void 363 cpupm_post_startup() 364 { 365 #ifndef __xpv 366 /* 367 * The CPU domain built by the PPM during CPUs attaching 368 * should be rebuilt with the information retrieved from 369 * ACPI. 370 */ 371 if (cpupm_rebuild_cpu_domains != NULL) 372 (*cpupm_rebuild_cpu_domains)(); 373 374 /* 375 * Only initialize the topspeed if P-states are enabled. 376 */ 377 if (cpupm_enabled & CPUPM_P_STATES && cpupm_init_topspeed != NULL) 378 (*cpupm_init_topspeed)(); 379 #endif 380 cpupm_ready = B_TRUE; 381 } 382 383 /* 384 * Allocate power domains for C,P and T States 385 */ 386 void 387 cpupm_alloc_domains(cpu_t *cp, int state) 388 { 389 cpupm_mach_state_t *mach_state = 390 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 391 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 392 cpupm_state_domains_t **dom_ptr; 393 cpupm_state_domains_t *dptr; 394 cpupm_state_domains_t **mach_dom_state_ptr; 395 uint32_t domain; 396 uint32_t type; 397 398 switch (state) { 399 case CPUPM_P_STATES: 400 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) { 401 domain = CPU_ACPI_PSD(handle).sd_domain; 402 type = CPU_ACPI_PSD(handle).sd_type; 403 } else { 404 mutex_enter(&cpu_lock); 405 domain = cpuid_get_chipid(cp); 406 mutex_exit(&cpu_lock); 407 type = CPU_ACPI_HW_ALL; 408 } 409 dom_ptr = &cpupm_pstate_domains; 410 mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain; 411 break; 412 case CPUPM_T_STATES: 413 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) { 414 domain = CPU_ACPI_TSD(handle).sd_domain; 415 type = CPU_ACPI_TSD(handle).sd_type; 416 } else { 417 mutex_enter(&cpu_lock); 418 domain = cpuid_get_chipid(cp); 419 mutex_exit(&cpu_lock); 420 type = CPU_ACPI_HW_ALL; 421 } 422 dom_ptr = &cpupm_tstate_domains; 423 mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain; 424 break; 425 case CPUPM_C_STATES: 426 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) { 427 domain = CPU_ACPI_CSD(handle).sd_domain; 428 type = CPU_ACPI_CSD(handle).sd_type; 429 } else { 430 mutex_enter(&cpu_lock); 431 domain = cpuid_get_coreid(cp); 432 mutex_exit(&cpu_lock); 433 type = CPU_ACPI_HW_ALL; 434 } 435 dom_ptr = &cpupm_cstate_domains; 436 mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain; 437 break; 438 default: 439 return; 440 } 441 442 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 443 if (dptr->pm_domain == domain) 444 break; 445 } 446 447 /* new domain is created and linked at the head */ 448 if (dptr == NULL) { 449 dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP); 450 dptr->pm_domain = domain; 451 dptr->pm_type = type; 452 dptr->pm_next = *dom_ptr; 453 mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN, 454 (void *)ipltospl(DISP_LEVEL)); 455 CPUSET_ZERO(dptr->pm_cpus); 456 *dom_ptr = dptr; 457 } 458 CPUSET_ADD(dptr->pm_cpus, cp->cpu_id); 459 *mach_dom_state_ptr = dptr; 460 } 461 462 /* 463 * Free C, P or T state power domains 464 */ 465 void 466 cpupm_free_domains(cpupm_state_domains_t **dom_ptr) 467 { 468 cpupm_state_domains_t *this_domain, *next_domain; 469 470 this_domain = *dom_ptr; 471 while (this_domain != NULL) { 472 next_domain = this_domain->pm_next; 473 mutex_destroy(&this_domain->pm_lock); 474 kmem_free((void *)this_domain, 475 sizeof (cpupm_state_domains_t)); 476 this_domain = next_domain; 477 } 478 *dom_ptr = NULL; 479 } 480 481 void 482 cpupm_alloc_ms_cstate(cpu_t *cp) 483 { 484 cpupm_mach_state_t *mach_state; 485 cpupm_mach_acpi_state_t *ms_cstate; 486 487 mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 488 ms_cstate = &mach_state->ms_cstate; 489 ASSERT(ms_cstate->cma_state.cstate == NULL); 490 ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t), 491 KM_SLEEP); 492 ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1; 493 } 494 495 void 496 cpupm_free_ms_cstate(cpu_t *cp) 497 { 498 cpupm_mach_state_t *mach_state = 499 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 500 cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate; 501 502 if (ms_cstate->cma_state.cstate != NULL) { 503 kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t)); 504 ms_cstate->cma_state.cstate = NULL; 505 } 506 } 507 508 void 509 cpupm_state_change(cpu_t *cp, int level, int state) 510 { 511 cpupm_mach_state_t *mach_state = 512 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 513 cpupm_state_ops_t *state_ops; 514 cpupm_state_domains_t *state_domain; 515 cpuset_t set; 516 517 DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level); 518 519 if (mach_state == NULL) { 520 return; 521 } 522 523 switch (state) { 524 case CPUPM_P_STATES: 525 state_ops = mach_state->ms_pstate.cma_ops; 526 state_domain = mach_state->ms_pstate.cma_domain; 527 break; 528 case CPUPM_T_STATES: 529 state_ops = mach_state->ms_tstate.cma_ops; 530 state_domain = mach_state->ms_tstate.cma_domain; 531 break; 532 default: 533 break; 534 } 535 536 switch (state_domain->pm_type) { 537 case CPU_ACPI_SW_ANY: 538 /* 539 * A request on any CPU in the domain transitions the domain 540 */ 541 CPUSET_ONLY(set, cp->cpu_id); 542 state_ops->cpus_change(set, level); 543 break; 544 case CPU_ACPI_SW_ALL: 545 /* 546 * All CPUs in the domain must request the transition 547 */ 548 case CPU_ACPI_HW_ALL: 549 /* 550 * P/T-state transitions are coordinated by the hardware 551 * For now, request the transition on all CPUs in the domain, 552 * but looking ahead we can probably be smarter about this. 553 */ 554 mutex_enter(&state_domain->pm_lock); 555 state_ops->cpus_change(state_domain->pm_cpus, level); 556 mutex_exit(&state_domain->pm_lock); 557 break; 558 default: 559 cmn_err(CE_WARN, "Unknown domain coordination type: %d", 560 state_domain->pm_type); 561 } 562 } 563 564 /* 565 * CPU PM interfaces exposed to the CPU power manager 566 */ 567 /*ARGSUSED*/ 568 id_t 569 cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type) 570 { 571 cpupm_mach_state_t *mach_state = 572 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 573 574 if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) && 575 !cpupm_is_enabled(CPUPM_C_STATES))) { 576 return (CPUPM_NO_DOMAIN); 577 } 578 if (type == CPUPM_DTYPE_ACTIVE) { 579 /* 580 * Return P-State domain for the specified CPU 581 */ 582 if (mach_state->ms_pstate.cma_domain) { 583 return (mach_state->ms_pstate.cma_domain->pm_domain); 584 } 585 } else if (type == CPUPM_DTYPE_IDLE) { 586 /* 587 * Return C-State domain for the specified CPU 588 */ 589 if (mach_state->ms_cstate.cma_domain) { 590 return (mach_state->ms_cstate.cma_domain->pm_domain); 591 } 592 } 593 return (CPUPM_NO_DOMAIN); 594 } 595 596 /*ARGSUSED*/ 597 uint_t 598 cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type, 599 cpupm_state_t *states) 600 { 601 int *speeds; 602 uint_t nspeeds, i; 603 604 /* 605 * Idle domain support unimplemented 606 */ 607 if (type != CPUPM_DTYPE_ACTIVE) { 608 return (0); 609 } 610 nspeeds = cpupm_get_speeds(cp, &speeds); 611 612 /* 613 * If the caller passes NULL for states, just return the 614 * number of states. 615 */ 616 if (states != NULL) { 617 for (i = 0; i < nspeeds; i++) { 618 states[i].cps_speed = speeds[i]; 619 states[i].cps_handle = (cpupm_handle_t)i; 620 } 621 } 622 cpupm_free_speeds(speeds, nspeeds); 623 return (nspeeds); 624 } 625 626 /*ARGSUSED*/ 627 int 628 cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state) 629 { 630 if (!cpupm_is_ready()) 631 return (-1); 632 633 cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES); 634 635 return (0); 636 } 637 638 /*ARGSUSED*/ 639 /* 640 * Note: It is the responsibility of the users of 641 * cpupm_get_speeds() to free the memory allocated 642 * for speeds using cpupm_free_speeds() 643 */ 644 uint_t 645 cpupm_get_speeds(cpu_t *cp, int **speeds) 646 { 647 #ifndef __xpv 648 cpupm_mach_state_t *mach_state = 649 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 650 return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds)); 651 #else 652 return (0); 653 #endif 654 } 655 656 /*ARGSUSED*/ 657 void 658 cpupm_free_speeds(int *speeds, uint_t nspeeds) 659 { 660 #ifndef __xpv 661 cpu_acpi_free_speeds(speeds, nspeeds); 662 #endif 663 } 664 665 /* 666 * All CPU instances have been initialized successfully. 667 */ 668 boolean_t 669 cpupm_power_ready(void) 670 { 671 return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready()); 672 } 673 674 /* 675 * All CPU instances have been initialized successfully. 676 */ 677 boolean_t 678 cpupm_throttle_ready(void) 679 { 680 return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready()); 681 } 682 683 /* 684 * All CPU instances have been initialized successfully. 685 */ 686 boolean_t 687 cpupm_cstate_ready(void) 688 { 689 return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready()); 690 } 691 692 void 693 cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 694 { 695 cpu_t *cp = ctx; 696 cpupm_mach_state_t *mach_state = 697 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 698 cpupm_notification_t *entry; 699 700 mutex_enter(&mach_state->ms_lock); 701 for (entry = mach_state->ms_handlers; entry != NULL; 702 entry = entry->nq_next) { 703 entry->nq_handler(obj, val, entry->nq_ctx); 704 } 705 mutex_exit(&mach_state->ms_lock); 706 } 707 708 /*ARGSUSED*/ 709 void 710 cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx) 711 { 712 #ifndef __xpv 713 cpupm_mach_state_t *mach_state = 714 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 715 cpupm_notification_t *entry; 716 717 entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP); 718 entry->nq_handler = handler; 719 entry->nq_ctx = ctx; 720 mutex_enter(&mach_state->ms_lock); 721 if (mach_state->ms_handlers == NULL) { 722 entry->nq_next = NULL; 723 mach_state->ms_handlers = entry; 724 cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle, 725 cpupm_notify_handler, cp); 726 727 } else { 728 entry->nq_next = mach_state->ms_handlers; 729 mach_state->ms_handlers = entry; 730 } 731 mutex_exit(&mach_state->ms_lock); 732 #endif 733 } 734 735 /*ARGSUSED*/ 736 static void 737 cpupm_free_notify_handlers(cpu_t *cp) 738 { 739 #ifndef __xpv 740 cpupm_mach_state_t *mach_state = 741 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 742 cpupm_notification_t *entry; 743 cpupm_notification_t *next; 744 745 mutex_enter(&mach_state->ms_lock); 746 if (mach_state->ms_handlers == NULL) { 747 mutex_exit(&mach_state->ms_lock); 748 return; 749 } 750 if (mach_state->ms_acpi_handle != NULL) { 751 cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle, 752 cpupm_notify_handler); 753 } 754 entry = mach_state->ms_handlers; 755 while (entry != NULL) { 756 next = entry->nq_next; 757 kmem_free(entry, sizeof (cpupm_notification_t)); 758 entry = next; 759 } 760 mach_state->ms_handlers = NULL; 761 mutex_exit(&mach_state->ms_lock); 762 #endif 763 } 764 765 /* 766 * Get the current max speed from the ACPI _PPC object 767 */ 768 /*ARGSUSED*/ 769 int 770 cpupm_get_top_speed(cpu_t *cp) 771 { 772 #ifndef __xpv 773 cpupm_mach_state_t *mach_state; 774 cpu_acpi_handle_t handle; 775 int plat_level; 776 uint_t nspeeds; 777 int max_level; 778 779 mach_state = 780 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 781 handle = mach_state->ms_acpi_handle; 782 783 cpu_acpi_cache_ppc(handle); 784 plat_level = CPU_ACPI_PPC(handle); 785 786 nspeeds = CPU_ACPI_PSTATES_COUNT(handle); 787 788 max_level = nspeeds - 1; 789 if ((plat_level < 0) || (plat_level > max_level)) { 790 cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: " 791 "_PPC out of range %d", cp->cpu_id, plat_level); 792 plat_level = 0; 793 } 794 795 return (plat_level); 796 #else 797 return (0); 798 #endif 799 } 800 801 /* 802 * This notification handler is called whenever the ACPI _PPC 803 * object changes. The _PPC is a sort of governor on power levels. 804 * It sets an upper threshold on which, _PSS defined, power levels 805 * are usuable. The _PPC value is dynamic and may change as properties 806 * (i.e., thermal or AC source) of the system change. 807 */ 808 809 static void 810 cpupm_power_manage_notifications(void *ctx) 811 { 812 cpu_t *cp = ctx; 813 int top_speed; 814 815 top_speed = cpupm_get_top_speed(cp); 816 cpupm_redefine_max_activepwr_state(cp, top_speed); 817 } 818 819 /* ARGSUSED */ 820 static void 821 cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 822 { 823 #ifndef __xpv 824 825 cpu_t *cp = ctx; 826 cpupm_mach_state_t *mach_state = 827 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 828 829 if (mach_state == NULL) 830 return; 831 832 /* 833 * Currently, we handle _TPC,_CST and _PPC change notifications. 834 */ 835 if (val == CPUPM_TPC_CHANGE_NOTIFICATION && 836 mach_state->ms_caps & CPUPM_T_STATES) { 837 cpupm_throttle_manage_notification(ctx); 838 } else if (val == CPUPM_CST_CHANGE_NOTIFICATION && 839 mach_state->ms_caps & CPUPM_C_STATES) { 840 cpuidle_manage_cstates(ctx); 841 } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION && 842 mach_state->ms_caps & CPUPM_P_STATES) { 843 cpupm_power_manage_notifications(ctx); 844 } 845 #endif 846 } 847 848 /* 849 * Update cpupm cstate data each time CPU exits idle. 850 */ 851 void 852 cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end) 853 { 854 cs_data->cs_idle_exit = end; 855 } 856 857 /* 858 * Determine next cstate based on cpupm data. 859 * Update cpupm cstate data each time CPU goes idle. 860 * Do as much as possible in the idle state bookkeeping function because the 861 * performance impact while idle is minimal compared to in the wakeup function 862 * when there is real work to do. 863 */ 864 uint32_t 865 cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates, 866 uint32_t cs_count, hrtime_t start) 867 { 868 hrtime_t duration; 869 hrtime_t ave_interval; 870 hrtime_t ave_idle_time; 871 uint32_t i; 872 873 duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter; 874 scalehrtime(&duration); 875 cs_data->cs_idle += duration; 876 cs_data->cs_idle_enter = start; 877 878 ++cs_data->cs_cnt; 879 if (cs_data->cs_cnt > cpupm_cs_sample_tunable) { 880 cs_data->cs_smpl_len = start - cs_data->cs_smpl_start; 881 scalehrtime(&cs_data->cs_smpl_len); 882 cs_data->cs_smpl_len |= 1; /* protect from DIV 0 */ 883 cs_data->cs_smpl_idle = cs_data->cs_idle; 884 cs_data->cs_idle = 0; 885 cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) / 886 cs_data->cs_smpl_len); 887 888 cs_data->cs_smpl_start = start; 889 cs_data->cs_cnt = 0; 890 891 /* 892 * Strand level C-state policy 893 * The cpu_acpi_cstate_t *cstates array is not required to 894 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3. 895 * There are cs_count entries in the cstates array. 896 * cs_data->cs_next_cstate contains the index of the next 897 * C-state this CPU should enter. 898 */ 899 ASSERT(cstates[0].cs_type == CPU_ACPI_C1); 900 901 /* 902 * Will CPU be idle long enough to save power? 903 */ 904 ave_idle_time = (cs_data->cs_smpl_idle / 905 cpupm_cs_sample_tunable) / 1000; 906 for (i = 1; i < cs_count; ++i) { 907 if (ave_idle_time < (cstates[i].cs_latency * 908 cpupm_cs_idle_save_tunable)) { 909 cs_count = i; 910 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 911 CPU, int, i); 912 } 913 } 914 915 /* 916 * Wakeup often (even when non-idle time is very short)? 917 * Some producer/consumer type loads fall into this category. 918 */ 919 ave_interval = (cs_data->cs_smpl_len / cpupm_cs_sample_tunable) 920 / 1000; 921 for (i = 1; i < cs_count; ++i) { 922 if (ave_interval <= (cstates[i].cs_latency * 923 cpupm_cs_idle_cost_tunable)) { 924 cs_count = i; 925 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 926 CPU, int, (CPU_MAX_CSTATES + i)); 927 } 928 } 929 930 /* 931 * Idle percent 932 */ 933 for (i = 1; i < cs_count; ++i) { 934 switch (cstates[i].cs_type) { 935 case CPU_ACPI_C2: 936 if (cs_data->cs_smpl_idle_pct < 937 cpupm_C2_idle_pct_tunable) { 938 cs_count = i; 939 DTRACE_PROBE2(cpupm__next__cstate, 940 cpu_t *, CPU, int, 941 ((2 * CPU_MAX_CSTATES) + i)); 942 } 943 break; 944 945 case CPU_ACPI_C3: 946 if (cs_data->cs_smpl_idle_pct < 947 cpupm_C3_idle_pct_tunable) { 948 cs_count = i; 949 DTRACE_PROBE2(cpupm__next__cstate, 950 cpu_t *, CPU, int, 951 ((2 * CPU_MAX_CSTATES) + i)); 952 } 953 break; 954 } 955 } 956 957 cs_data->cs_next_cstate = cs_count - 1; 958 } 959 960 return (cs_data->cs_next_cstate); 961 } 962