1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/cpu_pm.h> 31 #include <sys/x86_archext.h> 32 #include <sys/sdt.h> 33 #include <sys/spl.h> 34 #include <sys/machsystm.h> 35 #include <sys/hpet.h> 36 #include <sys/acpi/acpi.h> 37 #include <sys/acpica.h> 38 #include <sys/cpupm.h> 39 #include <sys/cpu_idle.h> 40 #include <sys/cpu_acpi.h> 41 #include <sys/cpupm_throttle.h> 42 #include <sys/dtrace.h> 43 44 /* 45 * This callback is used to build the PPM CPU domains once 46 * all the CPU devices have been started. The callback is 47 * initialized by the PPM driver to point to a routine that 48 * will build the domains. 49 */ 50 void (*cpupm_rebuild_cpu_domains)(void); 51 52 /* 53 * This callback is used to reset the topspeed for all the 54 * CPU devices. The callback is initialized by the PPM driver to 55 * point to a routine that will reinitialize all the CPU devices 56 * once all the CPU devices have been started and the CPU domains 57 * built. 58 */ 59 void (*cpupm_init_topspeed)(void); 60 61 /* 62 * This callback is used to redefine the topspeed for a CPU device. 63 * Since all CPUs in a domain should have identical properties, this 64 * callback is initialized by the PPM driver to point to a routine 65 * that will redefine the topspeed for all devices in a CPU domain. 66 * This callback is exercised whenever an ACPI _PPC change notification 67 * is received by the CPU driver. 68 */ 69 void (*cpupm_redefine_topspeed)(void *); 70 71 /* 72 * This callback is used by the PPM driver to call into the CPU driver 73 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value). 74 */ 75 void (*cpupm_set_topspeed_callb)(void *, int); 76 77 /* 78 * This callback is used by the PPM driver to call into the CPU driver 79 * to set a new topspeed for a CPU. 80 */ 81 int (*cpupm_get_topspeed_callb)(void *); 82 83 static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *); 84 static void cpupm_free_notify_handlers(cpu_t *); 85 86 /* 87 * Until proven otherwise, all power states are manageable. 88 */ 89 static uint32_t cpupm_enabled = CPUPM_ALL_STATES; 90 91 /* 92 * Until all CPUs have started, we do not allow 93 * power management. 94 */ 95 static boolean_t cpupm_ready = B_FALSE; 96 97 cpupm_state_domains_t *cpupm_pstate_domains = NULL; 98 cpupm_state_domains_t *cpupm_tstate_domains = NULL; 99 cpupm_state_domains_t *cpupm_cstate_domains = NULL; 100 101 /* 102 * c-state tunables 103 * 104 * cpupm_cs_sample_interval is the length of time we wait before 105 * recalculating c-state statistics. When a CPU goes idle it checks 106 * to see if it has been longer than cpupm_cs_sample_interval since it last 107 * caculated which C-state to go to. 108 * 109 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle 110 * divided by time spent in the idle state transitions. 111 * A value of 10 means the CPU will not spend more than 1/10 of its time 112 * in idle latency. The worst case performance will be 90% of non Deep C-state 113 * kernel. 114 * 115 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state 116 * before it is worth going there. Expressed as a multiple of latency. 117 */ 118 uint32_t cpupm_cs_sample_interval = 100*1000*1000; /* 100 milliseconds */ 119 uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */ 120 uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */ 121 uint16_t cpupm_C2_idle_pct_tunable = 70; 122 uint16_t cpupm_C3_idle_pct_tunable = 80; 123 124 #ifndef __xpv 125 extern boolean_t cpupm_intel_init(cpu_t *); 126 extern boolean_t cpupm_amd_init(cpu_t *); 127 128 typedef struct cpupm_vendor { 129 boolean_t (*cpuv_init)(cpu_t *); 130 } cpupm_vendor_t; 131 132 /* 133 * Table of supported vendors. 134 */ 135 static cpupm_vendor_t cpupm_vendors[] = { 136 cpupm_intel_init, 137 cpupm_amd_init, 138 NULL 139 }; 140 #endif 141 142 /* 143 * Initialize the machine. 144 * See if a module exists for managing power for this CPU. 145 */ 146 /*ARGSUSED*/ 147 void 148 cpupm_init(cpu_t *cp) 149 { 150 #ifndef __xpv 151 cpupm_vendor_t *vendors; 152 cpupm_mach_state_t *mach_state; 153 struct machcpu *mcpu = &(cp->cpu_m); 154 static boolean_t first = B_TRUE; 155 int *speeds; 156 uint_t nspeeds; 157 int ret; 158 159 mach_state = cp->cpu_m.mcpu_pm_mach_state = 160 kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP); 161 mach_state->ms_caps = CPUPM_NO_STATES; 162 mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL); 163 164 mach_state->ms_acpi_handle = cpu_acpi_init(cp); 165 if (mach_state->ms_acpi_handle == NULL) { 166 cpupm_free(cp); 167 cmn_err(CE_WARN, "!cpupm_init: processor %d: " 168 "unable to get ACPI handle", cp->cpu_id); 169 cmn_err(CE_NOTE, "!CPU power management will not function."); 170 CPUPM_DISABLE(); 171 first = B_FALSE; 172 return; 173 } 174 175 /* 176 * Loop through the CPU management module table and see if 177 * any of the modules implement CPU power management 178 * for this CPU. 179 */ 180 for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) { 181 if (vendors->cpuv_init(cp)) 182 break; 183 } 184 185 /* 186 * Nope, we can't power manage this CPU. 187 */ 188 if (vendors == NULL) { 189 cpupm_free(cp); 190 CPUPM_DISABLE(); 191 first = B_FALSE; 192 return; 193 } 194 195 /* 196 * If P-state support exists for this system, then initialize it. 197 */ 198 if (mach_state->ms_pstate.cma_ops != NULL) { 199 ret = mach_state->ms_pstate.cma_ops->cpus_init(cp); 200 if (ret != 0) { 201 mach_state->ms_pstate.cma_ops = NULL; 202 cpupm_disable(CPUPM_P_STATES); 203 } else { 204 nspeeds = cpupm_get_speeds(cp, &speeds); 205 if (nspeeds == 0) { 206 cmn_err(CE_NOTE, "!cpupm_init: processor %d:" 207 " no speeds to manage", cp->cpu_id); 208 } else { 209 cpupm_set_supp_freqs(cp, speeds, nspeeds); 210 cpupm_free_speeds(speeds, nspeeds); 211 mach_state->ms_caps |= CPUPM_P_STATES; 212 } 213 } 214 } 215 216 if (mach_state->ms_tstate.cma_ops != NULL) { 217 ret = mach_state->ms_tstate.cma_ops->cpus_init(cp); 218 if (ret != 0) { 219 mach_state->ms_tstate.cma_ops = NULL; 220 cpupm_disable(CPUPM_T_STATES); 221 } else { 222 mach_state->ms_caps |= CPUPM_T_STATES; 223 } 224 } 225 226 /* 227 * If C-states support exists for this system, then initialize it. 228 */ 229 if (mach_state->ms_cstate.cma_ops != NULL) { 230 ret = mach_state->ms_cstate.cma_ops->cpus_init(cp); 231 if (ret != 0) { 232 mach_state->ms_cstate.cma_ops = NULL; 233 mcpu->max_cstates = CPU_ACPI_C1; 234 cpupm_disable(CPUPM_C_STATES); 235 idle_cpu = non_deep_idle_cpu; 236 disp_enq_thread = non_deep_idle_disp_enq_thread; 237 } else if (cpu_deep_cstates_supported()) { 238 mcpu->max_cstates = cpu_acpi_get_max_cstates( 239 mach_state->ms_acpi_handle); 240 if (mcpu->max_cstates > CPU_ACPI_C1) { 241 (void) cstate_timer_callback( 242 CST_EVENT_MULTIPLE_CSTATES); 243 CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 244 mcpu->mcpu_idle_type = CPU_ACPI_C1; 245 disp_enq_thread = cstate_wakeup; 246 } else { 247 (void) cstate_timer_callback( 248 CST_EVENT_ONE_CSTATE); 249 } 250 mach_state->ms_caps |= CPUPM_C_STATES; 251 } else { 252 mcpu->max_cstates = CPU_ACPI_C1; 253 idle_cpu = non_deep_idle_cpu; 254 disp_enq_thread = non_deep_idle_disp_enq_thread; 255 } 256 } 257 258 259 if (mach_state->ms_caps == CPUPM_NO_STATES) { 260 cpupm_free(cp); 261 CPUPM_DISABLE(); 262 first = B_FALSE; 263 return; 264 } 265 266 if ((mach_state->ms_caps & CPUPM_T_STATES) || 267 (mach_state->ms_caps & CPUPM_P_STATES) || 268 (mach_state->ms_caps & CPUPM_C_STATES)) { 269 cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp); 270 if (first) { 271 acpica_write_cpupm_capabilities( 272 mach_state->ms_caps & CPUPM_P_STATES, 273 mach_state->ms_caps & CPUPM_C_STATES); 274 } 275 } 276 first = B_FALSE; 277 #endif 278 } 279 280 /* 281 * Free any resources allocated by cpupm_init(). 282 */ 283 /*ARGSUSED*/ 284 void 285 cpupm_free(cpu_t *cp) 286 { 287 #ifndef __xpv 288 cpupm_mach_state_t *mach_state = 289 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 290 291 if (mach_state == NULL) 292 return; 293 if (mach_state->ms_pstate.cma_ops != NULL) { 294 mach_state->ms_pstate.cma_ops->cpus_fini(cp); 295 mach_state->ms_pstate.cma_ops = NULL; 296 } 297 298 if (mach_state->ms_tstate.cma_ops != NULL) { 299 mach_state->ms_tstate.cma_ops->cpus_fini(cp); 300 mach_state->ms_tstate.cma_ops = NULL; 301 } 302 303 if (mach_state->ms_cstate.cma_ops != NULL) { 304 mach_state->ms_cstate.cma_ops->cpus_fini(cp); 305 mach_state->ms_cstate.cma_ops = NULL; 306 } 307 308 cpupm_free_notify_handlers(cp); 309 310 if (mach_state->ms_acpi_handle != NULL) { 311 cpu_acpi_fini(mach_state->ms_acpi_handle); 312 mach_state->ms_acpi_handle = NULL; 313 } 314 315 mutex_destroy(&mach_state->ms_lock); 316 kmem_free(mach_state, sizeof (cpupm_mach_state_t)); 317 cp->cpu_m.mcpu_pm_mach_state = NULL; 318 #endif 319 } 320 321 /* 322 * If all CPUs have started and at least one power state is manageable, 323 * then the CPUs are ready for power management. 324 */ 325 boolean_t 326 cpupm_is_ready() 327 { 328 #ifndef __xpv 329 if (cpupm_enabled == CPUPM_NO_STATES) 330 return (B_FALSE); 331 return (cpupm_ready); 332 #else 333 return (B_FALSE); 334 #endif 335 336 } 337 338 boolean_t 339 cpupm_is_enabled(uint32_t state) 340 { 341 return ((cpupm_enabled & state) == state); 342 } 343 344 /* 345 * By default, all states are enabled. 346 */ 347 void 348 cpupm_disable(uint32_t state) 349 { 350 351 if (state & CPUPM_P_STATES) { 352 cpupm_free_domains(&cpupm_pstate_domains); 353 } 354 if (state & CPUPM_T_STATES) { 355 cpupm_free_domains(&cpupm_tstate_domains); 356 } 357 if (state & CPUPM_C_STATES) { 358 cpupm_free_domains(&cpupm_cstate_domains); 359 } 360 cpupm_enabled &= ~state; 361 } 362 363 /* 364 * Once all CPUs have been started, the PPM driver should build CPU 365 * domains and initialize the topspeed for all CPU devices. 366 */ 367 void 368 cpupm_post_startup() 369 { 370 #ifndef __xpv 371 /* 372 * The CPU domain built by the PPM during CPUs attaching 373 * should be rebuilt with the information retrieved from 374 * ACPI. 375 */ 376 if (cpupm_rebuild_cpu_domains != NULL) 377 (*cpupm_rebuild_cpu_domains)(); 378 379 /* 380 * Only initialize the topspeed if P-states are enabled. 381 */ 382 if (cpupm_enabled & CPUPM_P_STATES && cpupm_init_topspeed != NULL) 383 (*cpupm_init_topspeed)(); 384 #endif 385 cpupm_ready = B_TRUE; 386 } 387 388 /* 389 * Allocate power domains for C,P and T States 390 */ 391 void 392 cpupm_alloc_domains(cpu_t *cp, int state) 393 { 394 cpupm_mach_state_t *mach_state = 395 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 396 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 397 cpupm_state_domains_t **dom_ptr; 398 cpupm_state_domains_t *dptr; 399 cpupm_state_domains_t **mach_dom_state_ptr; 400 uint32_t domain; 401 uint32_t type; 402 403 switch (state) { 404 case CPUPM_P_STATES: 405 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) { 406 domain = CPU_ACPI_PSD(handle).sd_domain; 407 type = CPU_ACPI_PSD(handle).sd_type; 408 } else { 409 mutex_enter(&cpu_lock); 410 domain = cpuid_get_chipid(cp); 411 mutex_exit(&cpu_lock); 412 type = CPU_ACPI_HW_ALL; 413 } 414 dom_ptr = &cpupm_pstate_domains; 415 mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain; 416 break; 417 case CPUPM_T_STATES: 418 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) { 419 domain = CPU_ACPI_TSD(handle).sd_domain; 420 type = CPU_ACPI_TSD(handle).sd_type; 421 } else { 422 mutex_enter(&cpu_lock); 423 domain = cpuid_get_chipid(cp); 424 mutex_exit(&cpu_lock); 425 type = CPU_ACPI_HW_ALL; 426 } 427 dom_ptr = &cpupm_tstate_domains; 428 mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain; 429 break; 430 case CPUPM_C_STATES: 431 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) { 432 domain = CPU_ACPI_CSD(handle).sd_domain; 433 type = CPU_ACPI_CSD(handle).sd_type; 434 } else { 435 mutex_enter(&cpu_lock); 436 domain = cpuid_get_coreid(cp); 437 mutex_exit(&cpu_lock); 438 type = CPU_ACPI_HW_ALL; 439 } 440 dom_ptr = &cpupm_cstate_domains; 441 mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain; 442 break; 443 default: 444 return; 445 } 446 447 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 448 if (dptr->pm_domain == domain) 449 break; 450 } 451 452 /* new domain is created and linked at the head */ 453 if (dptr == NULL) { 454 dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP); 455 dptr->pm_domain = domain; 456 dptr->pm_type = type; 457 dptr->pm_next = *dom_ptr; 458 mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN, 459 (void *)ipltospl(DISP_LEVEL)); 460 CPUSET_ZERO(dptr->pm_cpus); 461 *dom_ptr = dptr; 462 } 463 CPUSET_ADD(dptr->pm_cpus, cp->cpu_id); 464 *mach_dom_state_ptr = dptr; 465 } 466 467 /* 468 * Free C, P or T state power domains 469 */ 470 void 471 cpupm_free_domains(cpupm_state_domains_t **dom_ptr) 472 { 473 cpupm_state_domains_t *this_domain, *next_domain; 474 475 this_domain = *dom_ptr; 476 while (this_domain != NULL) { 477 next_domain = this_domain->pm_next; 478 mutex_destroy(&this_domain->pm_lock); 479 kmem_free((void *)this_domain, 480 sizeof (cpupm_state_domains_t)); 481 this_domain = next_domain; 482 } 483 *dom_ptr = NULL; 484 } 485 486 void 487 cpupm_alloc_ms_cstate(cpu_t *cp) 488 { 489 cpupm_mach_state_t *mach_state; 490 cpupm_mach_acpi_state_t *ms_cstate; 491 492 mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 493 ms_cstate = &mach_state->ms_cstate; 494 ASSERT(ms_cstate->cma_state.cstate == NULL); 495 ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t), 496 KM_SLEEP); 497 ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1; 498 } 499 500 void 501 cpupm_free_ms_cstate(cpu_t *cp) 502 { 503 cpupm_mach_state_t *mach_state = 504 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 505 cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate; 506 507 if (ms_cstate->cma_state.cstate != NULL) { 508 kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t)); 509 ms_cstate->cma_state.cstate = NULL; 510 } 511 } 512 513 void 514 cpupm_state_change(cpu_t *cp, int level, int state) 515 { 516 cpupm_mach_state_t *mach_state = 517 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 518 cpupm_state_ops_t *state_ops; 519 cpupm_state_domains_t *state_domain; 520 cpuset_t set; 521 522 DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level); 523 524 if (mach_state == NULL) { 525 return; 526 } 527 528 switch (state) { 529 case CPUPM_P_STATES: 530 state_ops = mach_state->ms_pstate.cma_ops; 531 state_domain = mach_state->ms_pstate.cma_domain; 532 break; 533 case CPUPM_T_STATES: 534 state_ops = mach_state->ms_tstate.cma_ops; 535 state_domain = mach_state->ms_tstate.cma_domain; 536 break; 537 default: 538 break; 539 } 540 541 switch (state_domain->pm_type) { 542 case CPU_ACPI_SW_ANY: 543 /* 544 * A request on any CPU in the domain transitions the domain 545 */ 546 CPUSET_ONLY(set, cp->cpu_id); 547 state_ops->cpus_change(set, level); 548 break; 549 case CPU_ACPI_SW_ALL: 550 /* 551 * All CPUs in the domain must request the transition 552 */ 553 case CPU_ACPI_HW_ALL: 554 /* 555 * P/T-state transitions are coordinated by the hardware 556 * For now, request the transition on all CPUs in the domain, 557 * but looking ahead we can probably be smarter about this. 558 */ 559 mutex_enter(&state_domain->pm_lock); 560 state_ops->cpus_change(state_domain->pm_cpus, level); 561 mutex_exit(&state_domain->pm_lock); 562 break; 563 default: 564 cmn_err(CE_NOTE, "Unknown domain coordination type: %d", 565 state_domain->pm_type); 566 } 567 } 568 569 /* 570 * CPU PM interfaces exposed to the CPU power manager 571 */ 572 /*ARGSUSED*/ 573 id_t 574 cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type) 575 { 576 cpupm_mach_state_t *mach_state = 577 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 578 579 if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) && 580 !cpupm_is_enabled(CPUPM_C_STATES))) { 581 return (CPUPM_NO_DOMAIN); 582 } 583 if (type == CPUPM_DTYPE_ACTIVE) { 584 /* 585 * Return P-State domain for the specified CPU 586 */ 587 if (mach_state->ms_pstate.cma_domain) { 588 return (mach_state->ms_pstate.cma_domain->pm_domain); 589 } 590 } else if (type == CPUPM_DTYPE_IDLE) { 591 /* 592 * Return C-State domain for the specified CPU 593 */ 594 if (mach_state->ms_cstate.cma_domain) { 595 return (mach_state->ms_cstate.cma_domain->pm_domain); 596 } 597 } 598 return (CPUPM_NO_DOMAIN); 599 } 600 601 /*ARGSUSED*/ 602 uint_t 603 cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type, 604 cpupm_state_t *states) 605 { 606 int *speeds; 607 uint_t nspeeds, i; 608 609 /* 610 * Idle domain support unimplemented 611 */ 612 if (type != CPUPM_DTYPE_ACTIVE) { 613 return (0); 614 } 615 nspeeds = cpupm_get_speeds(cp, &speeds); 616 617 /* 618 * If the caller passes NULL for states, just return the 619 * number of states. 620 */ 621 if (states != NULL) { 622 for (i = 0; i < nspeeds; i++) { 623 states[i].cps_speed = speeds[i]; 624 states[i].cps_handle = (cpupm_handle_t)i; 625 } 626 } 627 cpupm_free_speeds(speeds, nspeeds); 628 return (nspeeds); 629 } 630 631 /*ARGSUSED*/ 632 int 633 cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state) 634 { 635 if (!cpupm_is_ready()) 636 return (-1); 637 638 cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES); 639 640 return (0); 641 } 642 643 /*ARGSUSED*/ 644 /* 645 * Note: It is the responsibility of the users of 646 * cpupm_get_speeds() to free the memory allocated 647 * for speeds using cpupm_free_speeds() 648 */ 649 uint_t 650 cpupm_get_speeds(cpu_t *cp, int **speeds) 651 { 652 #ifndef __xpv 653 cpupm_mach_state_t *mach_state = 654 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 655 return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds)); 656 #else 657 return (0); 658 #endif 659 } 660 661 /*ARGSUSED*/ 662 void 663 cpupm_free_speeds(int *speeds, uint_t nspeeds) 664 { 665 #ifndef __xpv 666 cpu_acpi_free_speeds(speeds, nspeeds); 667 #endif 668 } 669 670 /* 671 * All CPU instances have been initialized successfully. 672 */ 673 boolean_t 674 cpupm_power_ready(void) 675 { 676 return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready()); 677 } 678 679 /* 680 * All CPU instances have been initialized successfully. 681 */ 682 boolean_t 683 cpupm_throttle_ready(void) 684 { 685 return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready()); 686 } 687 688 /* 689 * All CPU instances have been initialized successfully. 690 */ 691 boolean_t 692 cpupm_cstate_ready(void) 693 { 694 return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready()); 695 } 696 697 void 698 cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 699 { 700 cpu_t *cp = ctx; 701 cpupm_mach_state_t *mach_state = 702 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 703 cpupm_notification_t *entry; 704 705 mutex_enter(&mach_state->ms_lock); 706 for (entry = mach_state->ms_handlers; entry != NULL; 707 entry = entry->nq_next) { 708 entry->nq_handler(obj, val, entry->nq_ctx); 709 } 710 mutex_exit(&mach_state->ms_lock); 711 } 712 713 /*ARGSUSED*/ 714 void 715 cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx) 716 { 717 #ifndef __xpv 718 cpupm_mach_state_t *mach_state = 719 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 720 cpupm_notification_t *entry; 721 722 entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP); 723 entry->nq_handler = handler; 724 entry->nq_ctx = ctx; 725 mutex_enter(&mach_state->ms_lock); 726 if (mach_state->ms_handlers == NULL) { 727 entry->nq_next = NULL; 728 mach_state->ms_handlers = entry; 729 cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle, 730 cpupm_notify_handler, cp); 731 732 } else { 733 entry->nq_next = mach_state->ms_handlers; 734 mach_state->ms_handlers = entry; 735 } 736 mutex_exit(&mach_state->ms_lock); 737 #endif 738 } 739 740 /*ARGSUSED*/ 741 static void 742 cpupm_free_notify_handlers(cpu_t *cp) 743 { 744 #ifndef __xpv 745 cpupm_mach_state_t *mach_state = 746 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 747 cpupm_notification_t *entry; 748 cpupm_notification_t *next; 749 750 mutex_enter(&mach_state->ms_lock); 751 if (mach_state->ms_handlers == NULL) { 752 mutex_exit(&mach_state->ms_lock); 753 return; 754 } 755 if (mach_state->ms_acpi_handle != NULL) { 756 cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle, 757 cpupm_notify_handler); 758 } 759 entry = mach_state->ms_handlers; 760 while (entry != NULL) { 761 next = entry->nq_next; 762 kmem_free(entry, sizeof (cpupm_notification_t)); 763 entry = next; 764 } 765 mach_state->ms_handlers = NULL; 766 mutex_exit(&mach_state->ms_lock); 767 #endif 768 } 769 770 /* 771 * Get the current max speed from the ACPI _PPC object 772 */ 773 /*ARGSUSED*/ 774 int 775 cpupm_get_top_speed(cpu_t *cp) 776 { 777 #ifndef __xpv 778 cpupm_mach_state_t *mach_state; 779 cpu_acpi_handle_t handle; 780 int plat_level; 781 uint_t nspeeds; 782 int max_level; 783 784 mach_state = 785 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 786 handle = mach_state->ms_acpi_handle; 787 788 cpu_acpi_cache_ppc(handle); 789 plat_level = CPU_ACPI_PPC(handle); 790 791 nspeeds = CPU_ACPI_PSTATES_COUNT(handle); 792 793 max_level = nspeeds - 1; 794 if ((plat_level < 0) || (plat_level > max_level)) { 795 cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: " 796 "_PPC out of range %d", cp->cpu_id, plat_level); 797 plat_level = 0; 798 } 799 800 return (plat_level); 801 #else 802 return (0); 803 #endif 804 } 805 806 /* 807 * This notification handler is called whenever the ACPI _PPC 808 * object changes. The _PPC is a sort of governor on power levels. 809 * It sets an upper threshold on which, _PSS defined, power levels 810 * are usuable. The _PPC value is dynamic and may change as properties 811 * (i.e., thermal or AC source) of the system change. 812 */ 813 814 static void 815 cpupm_power_manage_notifications(void *ctx) 816 { 817 cpu_t *cp = ctx; 818 int top_speed; 819 820 top_speed = cpupm_get_top_speed(cp); 821 cpupm_redefine_max_activepwr_state(cp, top_speed); 822 } 823 824 /* ARGSUSED */ 825 static void 826 cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 827 { 828 #ifndef __xpv 829 830 cpu_t *cp = ctx; 831 cpupm_mach_state_t *mach_state = 832 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 833 834 if (mach_state == NULL) 835 return; 836 837 /* 838 * Currently, we handle _TPC,_CST and _PPC change notifications. 839 */ 840 if (val == CPUPM_TPC_CHANGE_NOTIFICATION && 841 mach_state->ms_caps & CPUPM_T_STATES) { 842 cpupm_throttle_manage_notification(ctx); 843 } else if (val == CPUPM_CST_CHANGE_NOTIFICATION && 844 mach_state->ms_caps & CPUPM_C_STATES) { 845 cpuidle_manage_cstates(ctx); 846 } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION && 847 mach_state->ms_caps & CPUPM_P_STATES) { 848 cpupm_power_manage_notifications(ctx); 849 } 850 #endif 851 } 852 853 /* 854 * Update cpupm cstate data each time CPU exits idle. 855 */ 856 void 857 cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end) 858 { 859 cs_data->cs_idle_exit = end; 860 } 861 862 /* 863 * Determine next cstate based on cpupm data. 864 * Update cpupm cstate data each time CPU goes idle. 865 * Do as much as possible in the idle state bookkeeping function because the 866 * performance impact while idle is minimal compared to in the wakeup function 867 * when there is real work to do. 868 */ 869 uint32_t 870 cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates, 871 uint32_t cs_count, hrtime_t start) 872 { 873 hrtime_t duration; 874 hrtime_t ave_interval; 875 hrtime_t ave_idle_time; 876 uint32_t i, smpl_cnt; 877 878 duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter; 879 scalehrtime(&duration); 880 cs_data->cs_idle += duration; 881 cs_data->cs_idle_enter = start; 882 883 smpl_cnt = ++cs_data->cs_cnt; 884 cs_data->cs_smpl_len = start - cs_data->cs_smpl_start; 885 scalehrtime(&cs_data->cs_smpl_len); 886 if (cs_data->cs_smpl_len > cpupm_cs_sample_interval) { 887 cs_data->cs_smpl_idle = cs_data->cs_idle; 888 cs_data->cs_idle = 0; 889 cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) / 890 cs_data->cs_smpl_len); 891 892 cs_data->cs_smpl_start = start; 893 cs_data->cs_cnt = 0; 894 895 /* 896 * Strand level C-state policy 897 * The cpu_acpi_cstate_t *cstates array is not required to 898 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3. 899 * There are cs_count entries in the cstates array. 900 * cs_data->cs_next_cstate contains the index of the next 901 * C-state this CPU should enter. 902 */ 903 ASSERT(cstates[0].cs_type == CPU_ACPI_C1); 904 905 /* 906 * Will CPU be idle long enough to save power? 907 */ 908 ave_idle_time = (cs_data->cs_smpl_idle / smpl_cnt) / 1000; 909 for (i = 1; i < cs_count; ++i) { 910 if (ave_idle_time < (cstates[i].cs_latency * 911 cpupm_cs_idle_save_tunable)) { 912 cs_count = i; 913 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 914 CPU, int, i); 915 } 916 } 917 918 /* 919 * Wakeup often (even when non-idle time is very short)? 920 * Some producer/consumer type loads fall into this category. 921 */ 922 ave_interval = (cs_data->cs_smpl_len / smpl_cnt) / 1000; 923 for (i = 1; i < cs_count; ++i) { 924 if (ave_interval <= (cstates[i].cs_latency * 925 cpupm_cs_idle_cost_tunable)) { 926 cs_count = i; 927 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 928 CPU, int, (CPU_MAX_CSTATES + i)); 929 } 930 } 931 932 /* 933 * Idle percent 934 */ 935 for (i = 1; i < cs_count; ++i) { 936 switch (cstates[i].cs_type) { 937 case CPU_ACPI_C2: 938 if (cs_data->cs_smpl_idle_pct < 939 cpupm_C2_idle_pct_tunable) { 940 cs_count = i; 941 DTRACE_PROBE2(cpupm__next__cstate, 942 cpu_t *, CPU, int, 943 ((2 * CPU_MAX_CSTATES) + i)); 944 } 945 break; 946 947 case CPU_ACPI_C3: 948 if (cs_data->cs_smpl_idle_pct < 949 cpupm_C3_idle_pct_tunable) { 950 cs_count = i; 951 DTRACE_PROBE2(cpupm__next__cstate, 952 cpu_t *, CPU, int, 953 ((2 * CPU_MAX_CSTATES) + i)); 954 } 955 break; 956 } 957 } 958 959 cs_data->cs_next_cstate = cs_count - 1; 960 } 961 962 return (cs_data->cs_next_cstate); 963 } 964