1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/cpu_pm.h> 31 #include <sys/x86_archext.h> 32 #include <sys/sdt.h> 33 #include <sys/spl.h> 34 #include <sys/machsystm.h> 35 #include <sys/archsystm.h> 36 #include <sys/hpet.h> 37 #include <sys/acpi/acpi.h> 38 #include <sys/acpica.h> 39 #include <sys/cpupm.h> 40 #include <sys/cpu_idle.h> 41 #include <sys/cpu_acpi.h> 42 #include <sys/cpupm_throttle.h> 43 #include <sys/dtrace.h> 44 #include <sys/note.h> 45 46 /* 47 * This callback is used to build the PPM CPU domains once 48 * a CPU device has been started. The callback is initialized 49 * by the PPM driver to point to a routine that will build the 50 * domains. 51 */ 52 void (*cpupm_ppm_alloc_pstate_domains)(cpu_t *); 53 54 /* 55 * This callback is used to remove CPU from the PPM CPU domains 56 * when the cpu driver is detached. The callback is initialized 57 * by the PPM driver to point to a routine that will remove CPU 58 * from the domains. 59 */ 60 void (*cpupm_ppm_free_pstate_domains)(cpu_t *); 61 62 /* 63 * This callback is used to redefine the topspeed for a CPU device. 64 * Since all CPUs in a domain should have identical properties, this 65 * callback is initialized by the PPM driver to point to a routine 66 * that will redefine the topspeed for all devices in a CPU domain. 67 * This callback is exercised whenever an ACPI _PPC change notification 68 * is received by the CPU driver. 69 */ 70 void (*cpupm_redefine_topspeed)(void *); 71 72 /* 73 * This callback is used by the PPM driver to call into the CPU driver 74 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value). 75 */ 76 void (*cpupm_set_topspeed_callb)(void *, int); 77 78 /* 79 * This callback is used by the PPM driver to call into the CPU driver 80 * to set a new topspeed for a CPU. 81 */ 82 int (*cpupm_get_topspeed_callb)(void *); 83 84 static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *); 85 static void cpupm_free_notify_handlers(cpu_t *); 86 static void cpupm_power_manage_notifications(void *); 87 88 /* 89 * Until proven otherwise, all power states are manageable. 90 */ 91 static uint32_t cpupm_enabled = CPUPM_ALL_STATES; 92 93 cpupm_state_domains_t *cpupm_pstate_domains = NULL; 94 cpupm_state_domains_t *cpupm_tstate_domains = NULL; 95 cpupm_state_domains_t *cpupm_cstate_domains = NULL; 96 97 /* 98 * c-state tunables 99 * 100 * cpupm_cs_sample_interval is the length of time we wait before 101 * recalculating c-state statistics. When a CPU goes idle it checks 102 * to see if it has been longer than cpupm_cs_sample_interval since it last 103 * caculated which C-state to go to. 104 * 105 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle 106 * divided by time spent in the idle state transitions. 107 * A value of 10 means the CPU will not spend more than 1/10 of its time 108 * in idle latency. The worst case performance will be 90% of non Deep C-state 109 * kernel. 110 * 111 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state 112 * before it is worth going there. Expressed as a multiple of latency. 113 */ 114 uint32_t cpupm_cs_sample_interval = 100*1000*1000; /* 100 milliseconds */ 115 uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */ 116 uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */ 117 uint16_t cpupm_C2_idle_pct_tunable = 70; 118 uint16_t cpupm_C3_idle_pct_tunable = 80; 119 120 #ifndef __xpv 121 extern boolean_t cpupm_intel_init(cpu_t *); 122 extern boolean_t cpupm_amd_init(cpu_t *); 123 124 typedef struct cpupm_vendor { 125 boolean_t (*cpuv_init)(cpu_t *); 126 } cpupm_vendor_t; 127 128 /* 129 * Table of supported vendors. 130 */ 131 static cpupm_vendor_t cpupm_vendors[] = { 132 cpupm_intel_init, 133 cpupm_amd_init, 134 NULL 135 }; 136 #endif 137 138 /* 139 * Initialize the machine. 140 * See if a module exists for managing power for this CPU. 141 */ 142 /*ARGSUSED*/ 143 void 144 cpupm_init(cpu_t *cp) 145 { 146 #ifndef __xpv 147 cpupm_vendor_t *vendors; 148 cpupm_mach_state_t *mach_state; 149 struct machcpu *mcpu = &(cp->cpu_m); 150 static boolean_t first = B_TRUE; 151 int *speeds; 152 uint_t nspeeds; 153 int ret; 154 155 mach_state = cp->cpu_m.mcpu_pm_mach_state = 156 kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP); 157 mach_state->ms_caps = CPUPM_NO_STATES; 158 mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL); 159 160 mach_state->ms_acpi_handle = cpu_acpi_init(cp); 161 if (mach_state->ms_acpi_handle == NULL) { 162 cpupm_fini(cp); 163 cmn_err(CE_WARN, "!cpupm_init: processor %d: " 164 "unable to get ACPI handle", cp->cpu_id); 165 cmn_err(CE_NOTE, "!CPU power management will not function."); 166 CPUPM_DISABLE(); 167 first = B_FALSE; 168 return; 169 } 170 171 /* 172 * Loop through the CPU management module table and see if 173 * any of the modules implement CPU power management 174 * for this CPU. 175 */ 176 for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) { 177 if (vendors->cpuv_init(cp)) 178 break; 179 } 180 181 /* 182 * Nope, we can't power manage this CPU. 183 */ 184 if (vendors == NULL) { 185 cpupm_fini(cp); 186 CPUPM_DISABLE(); 187 first = B_FALSE; 188 return; 189 } 190 191 /* 192 * If P-state support exists for this system, then initialize it. 193 */ 194 if (mach_state->ms_pstate.cma_ops != NULL) { 195 ret = mach_state->ms_pstate.cma_ops->cpus_init(cp); 196 if (ret != 0) { 197 mach_state->ms_pstate.cma_ops = NULL; 198 cpupm_disable(CPUPM_P_STATES); 199 } else { 200 nspeeds = cpupm_get_speeds(cp, &speeds); 201 if (nspeeds == 0) { 202 cmn_err(CE_NOTE, "!cpupm_init: processor %d:" 203 " no speeds to manage", cp->cpu_id); 204 } else { 205 cpupm_set_supp_freqs(cp, speeds, nspeeds); 206 cpupm_free_speeds(speeds, nspeeds); 207 mach_state->ms_caps |= CPUPM_P_STATES; 208 } 209 } 210 } 211 212 if (mach_state->ms_tstate.cma_ops != NULL) { 213 ret = mach_state->ms_tstate.cma_ops->cpus_init(cp); 214 if (ret != 0) { 215 mach_state->ms_tstate.cma_ops = NULL; 216 cpupm_disable(CPUPM_T_STATES); 217 } else { 218 mach_state->ms_caps |= CPUPM_T_STATES; 219 } 220 } 221 222 /* 223 * If C-states support exists for this system, then initialize it. 224 */ 225 if (mach_state->ms_cstate.cma_ops != NULL) { 226 ret = mach_state->ms_cstate.cma_ops->cpus_init(cp); 227 if (ret != 0) { 228 mach_state->ms_cstate.cma_ops = NULL; 229 mcpu->max_cstates = CPU_ACPI_C1; 230 cpupm_disable(CPUPM_C_STATES); 231 idle_cpu = non_deep_idle_cpu; 232 disp_enq_thread = non_deep_idle_disp_enq_thread; 233 } else if (cpu_deep_cstates_supported()) { 234 mcpu->max_cstates = cpu_acpi_get_max_cstates( 235 mach_state->ms_acpi_handle); 236 if (mcpu->max_cstates > CPU_ACPI_C1) { 237 (void) cstate_timer_callback( 238 CST_EVENT_MULTIPLE_CSTATES); 239 CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 240 mcpu->mcpu_idle_type = CPU_ACPI_C1; 241 disp_enq_thread = cstate_wakeup; 242 } else { 243 (void) cstate_timer_callback( 244 CST_EVENT_ONE_CSTATE); 245 } 246 mach_state->ms_caps |= CPUPM_C_STATES; 247 } else { 248 mcpu->max_cstates = CPU_ACPI_C1; 249 idle_cpu = non_deep_idle_cpu; 250 disp_enq_thread = non_deep_idle_disp_enq_thread; 251 } 252 } 253 254 255 if (mach_state->ms_caps == CPUPM_NO_STATES) { 256 cpupm_fini(cp); 257 CPUPM_DISABLE(); 258 first = B_FALSE; 259 return; 260 } 261 262 if ((mach_state->ms_caps & CPUPM_T_STATES) || 263 (mach_state->ms_caps & CPUPM_P_STATES) || 264 (mach_state->ms_caps & CPUPM_C_STATES)) { 265 if (first) { 266 acpica_write_cpupm_capabilities( 267 mach_state->ms_caps & CPUPM_P_STATES, 268 mach_state->ms_caps & CPUPM_C_STATES); 269 } 270 cpupm_throttle_manage_notification(cp); 271 cpuidle_manage_cstates(cp); 272 cpupm_power_manage_notifications(cp); 273 cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp); 274 } 275 first = B_FALSE; 276 #endif 277 } 278 279 /* 280 * Free any resources allocated during cpupm initialization or cpupm start. 281 */ 282 /*ARGSUSED*/ 283 void 284 cpupm_free(cpu_t *cp, boolean_t cpupm_stop) 285 { 286 #ifndef __xpv 287 cpupm_mach_state_t *mach_state = 288 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 289 290 if (mach_state == NULL) 291 return; 292 293 if (mach_state->ms_pstate.cma_ops != NULL) { 294 if (cpupm_stop) 295 mach_state->ms_pstate.cma_ops->cpus_stop(cp); 296 else 297 mach_state->ms_pstate.cma_ops->cpus_fini(cp); 298 mach_state->ms_pstate.cma_ops = NULL; 299 } 300 301 if (mach_state->ms_tstate.cma_ops != NULL) { 302 if (cpupm_stop) 303 mach_state->ms_tstate.cma_ops->cpus_stop(cp); 304 else 305 mach_state->ms_tstate.cma_ops->cpus_fini(cp); 306 mach_state->ms_tstate.cma_ops = NULL; 307 } 308 309 if (mach_state->ms_cstate.cma_ops != NULL) { 310 if (cpupm_stop) 311 mach_state->ms_cstate.cma_ops->cpus_stop(cp); 312 else 313 mach_state->ms_cstate.cma_ops->cpus_fini(cp); 314 315 mach_state->ms_cstate.cma_ops = NULL; 316 } 317 318 cpupm_free_notify_handlers(cp); 319 320 if (mach_state->ms_acpi_handle != NULL) { 321 cpu_acpi_fini(mach_state->ms_acpi_handle); 322 mach_state->ms_acpi_handle = NULL; 323 } 324 325 mutex_destroy(&mach_state->ms_lock); 326 kmem_free(mach_state, sizeof (cpupm_mach_state_t)); 327 cp->cpu_m.mcpu_pm_mach_state = NULL; 328 #endif 329 } 330 331 void 332 cpupm_fini(cpu_t *cp) 333 { 334 /* 335 * call (*cpus_fini)() ops to release the cpupm resource 336 * in the P/C/T-state driver 337 */ 338 cpupm_free(cp, B_FALSE); 339 } 340 341 void 342 cpupm_start(cpu_t *cp) 343 { 344 cpupm_init(cp); 345 } 346 347 void 348 cpupm_stop(cpu_t *cp) 349 { 350 /* 351 * call (*cpus_stop)() ops to reclaim the cpupm resource 352 * in the P/C/T-state driver 353 */ 354 cpupm_free(cp, B_TRUE); 355 } 356 357 /* 358 * If A CPU has started and at least one power state is manageable, 359 * then the CPU is ready for power management. 360 */ 361 boolean_t 362 cpupm_is_ready(cpu_t *cp) 363 { 364 #ifndef __xpv 365 cpupm_mach_state_t *mach_state = 366 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 367 uint32_t cpupm_caps = mach_state->ms_caps; 368 369 if (cpupm_enabled == CPUPM_NO_STATES) 370 return (B_FALSE); 371 372 if ((cpupm_caps & CPUPM_T_STATES) || 373 (cpupm_caps & CPUPM_P_STATES) || 374 (cpupm_caps & CPUPM_C_STATES)) 375 376 return (B_TRUE); 377 return (B_FALSE); 378 #else 379 _NOTE(ARGUNUSED(cp)); 380 return (B_FALSE); 381 #endif 382 } 383 384 boolean_t 385 cpupm_is_enabled(uint32_t state) 386 { 387 return ((cpupm_enabled & state) == state); 388 } 389 390 /* 391 * By default, all states are enabled. 392 */ 393 void 394 cpupm_disable(uint32_t state) 395 { 396 397 if (state & CPUPM_P_STATES) { 398 cpupm_free_domains(&cpupm_pstate_domains); 399 } 400 if (state & CPUPM_T_STATES) { 401 cpupm_free_domains(&cpupm_tstate_domains); 402 } 403 if (state & CPUPM_C_STATES) { 404 cpupm_free_domains(&cpupm_cstate_domains); 405 } 406 cpupm_enabled &= ~state; 407 } 408 409 /* 410 * Allocate power domains for C,P and T States 411 */ 412 void 413 cpupm_alloc_domains(cpu_t *cp, int state) 414 { 415 cpupm_mach_state_t *mach_state = 416 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 417 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 418 cpupm_state_domains_t **dom_ptr; 419 cpupm_state_domains_t *dptr; 420 cpupm_state_domains_t **mach_dom_state_ptr; 421 uint32_t domain; 422 uint32_t type; 423 424 switch (state) { 425 case CPUPM_P_STATES: 426 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) { 427 domain = CPU_ACPI_PSD(handle).sd_domain; 428 type = CPU_ACPI_PSD(handle).sd_type; 429 } else { 430 mutex_enter(&cpu_lock); 431 domain = cpuid_get_chipid(cp); 432 mutex_exit(&cpu_lock); 433 type = CPU_ACPI_HW_ALL; 434 } 435 dom_ptr = &cpupm_pstate_domains; 436 mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain; 437 break; 438 case CPUPM_T_STATES: 439 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) { 440 domain = CPU_ACPI_TSD(handle).sd_domain; 441 type = CPU_ACPI_TSD(handle).sd_type; 442 } else { 443 mutex_enter(&cpu_lock); 444 domain = cpuid_get_chipid(cp); 445 mutex_exit(&cpu_lock); 446 type = CPU_ACPI_HW_ALL; 447 } 448 dom_ptr = &cpupm_tstate_domains; 449 mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain; 450 break; 451 case CPUPM_C_STATES: 452 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) { 453 domain = CPU_ACPI_CSD(handle).sd_domain; 454 type = CPU_ACPI_CSD(handle).sd_type; 455 } else { 456 mutex_enter(&cpu_lock); 457 domain = cpuid_get_coreid(cp); 458 mutex_exit(&cpu_lock); 459 type = CPU_ACPI_HW_ALL; 460 } 461 dom_ptr = &cpupm_cstate_domains; 462 mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain; 463 break; 464 default: 465 return; 466 } 467 468 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 469 if (dptr->pm_domain == domain) 470 break; 471 } 472 473 /* new domain is created and linked at the head */ 474 if (dptr == NULL) { 475 dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP); 476 dptr->pm_domain = domain; 477 dptr->pm_type = type; 478 dptr->pm_next = *dom_ptr; 479 mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN, 480 (void *)ipltospl(DISP_LEVEL)); 481 CPUSET_ZERO(dptr->pm_cpus); 482 *dom_ptr = dptr; 483 } 484 CPUSET_ADD(dptr->pm_cpus, cp->cpu_id); 485 *mach_dom_state_ptr = dptr; 486 } 487 488 /* 489 * Free C, P or T state power domains 490 */ 491 void 492 cpupm_free_domains(cpupm_state_domains_t **dom_ptr) 493 { 494 cpupm_state_domains_t *this_domain, *next_domain; 495 496 this_domain = *dom_ptr; 497 while (this_domain != NULL) { 498 next_domain = this_domain->pm_next; 499 mutex_destroy(&this_domain->pm_lock); 500 kmem_free((void *)this_domain, 501 sizeof (cpupm_state_domains_t)); 502 this_domain = next_domain; 503 } 504 *dom_ptr = NULL; 505 } 506 507 /* 508 * Remove CPU from C, P or T state power domains 509 */ 510 void 511 cpupm_remove_domains(cpu_t *cp, int state, cpupm_state_domains_t **dom_ptr) 512 { 513 cpupm_mach_state_t *mach_state = 514 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 515 cpupm_state_domains_t *dptr; 516 uint32_t pm_domain; 517 ulong_t iflag; 518 519 ASSERT(mach_state); 520 521 switch (state) { 522 case CPUPM_P_STATES: 523 pm_domain = mach_state->ms_pstate.cma_domain->pm_domain; 524 break; 525 case CPUPM_T_STATES: 526 pm_domain = mach_state->ms_tstate.cma_domain->pm_domain; 527 break; 528 case CPUPM_C_STATES: 529 pm_domain = mach_state->ms_cstate.cma_domain->pm_domain; 530 break; 531 default: 532 return; 533 } 534 535 /* 536 * Find the CPU C, P or T state power domain 537 */ 538 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 539 if (dptr->pm_domain == pm_domain) 540 break; 541 } 542 543 /* 544 * return if no matched domain found 545 */ 546 if (dptr == NULL) 547 return; 548 549 /* 550 * We found one matched power domain, remove CPU from its cpuset. 551 * Interrupt is disabled here to avoid the race conditions between 552 * event change notification and cpu remove. 553 */ 554 iflag = intr_clear(); 555 mutex_enter(&dptr->pm_lock); 556 if (CPU_IN_SET(dptr->pm_cpus, cp->cpu_id)) 557 CPUSET_DEL(dptr->pm_cpus, cp->cpu_id); 558 mutex_exit(&dptr->pm_lock); 559 intr_restore(iflag); 560 } 561 562 void 563 cpupm_alloc_ms_cstate(cpu_t *cp) 564 { 565 cpupm_mach_state_t *mach_state; 566 cpupm_mach_acpi_state_t *ms_cstate; 567 568 mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 569 ms_cstate = &mach_state->ms_cstate; 570 ASSERT(ms_cstate->cma_state.cstate == NULL); 571 ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t), 572 KM_SLEEP); 573 ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1; 574 } 575 576 void 577 cpupm_free_ms_cstate(cpu_t *cp) 578 { 579 cpupm_mach_state_t *mach_state = 580 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 581 cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate; 582 583 if (ms_cstate->cma_state.cstate != NULL) { 584 kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t)); 585 ms_cstate->cma_state.cstate = NULL; 586 } 587 } 588 589 void 590 cpupm_state_change(cpu_t *cp, int level, int state) 591 { 592 cpupm_mach_state_t *mach_state = 593 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 594 cpupm_state_ops_t *state_ops; 595 cpupm_state_domains_t *state_domain; 596 cpuset_t set; 597 598 DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level); 599 600 if (mach_state == NULL) { 601 return; 602 } 603 604 switch (state) { 605 case CPUPM_P_STATES: 606 state_ops = mach_state->ms_pstate.cma_ops; 607 state_domain = mach_state->ms_pstate.cma_domain; 608 break; 609 case CPUPM_T_STATES: 610 state_ops = mach_state->ms_tstate.cma_ops; 611 state_domain = mach_state->ms_tstate.cma_domain; 612 break; 613 default: 614 break; 615 } 616 617 switch (state_domain->pm_type) { 618 case CPU_ACPI_SW_ANY: 619 /* 620 * A request on any CPU in the domain transitions the domain 621 */ 622 CPUSET_ONLY(set, cp->cpu_id); 623 state_ops->cpus_change(set, level); 624 break; 625 case CPU_ACPI_SW_ALL: 626 /* 627 * All CPUs in the domain must request the transition 628 */ 629 case CPU_ACPI_HW_ALL: 630 /* 631 * P/T-state transitions are coordinated by the hardware 632 * For now, request the transition on all CPUs in the domain, 633 * but looking ahead we can probably be smarter about this. 634 */ 635 mutex_enter(&state_domain->pm_lock); 636 state_ops->cpus_change(state_domain->pm_cpus, level); 637 mutex_exit(&state_domain->pm_lock); 638 break; 639 default: 640 cmn_err(CE_NOTE, "Unknown domain coordination type: %d", 641 state_domain->pm_type); 642 } 643 } 644 645 /* 646 * CPU PM interfaces exposed to the CPU power manager 647 */ 648 /*ARGSUSED*/ 649 id_t 650 cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type) 651 { 652 cpupm_mach_state_t *mach_state = 653 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 654 655 if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) && 656 !cpupm_is_enabled(CPUPM_C_STATES))) { 657 return (CPUPM_NO_DOMAIN); 658 } 659 if (type == CPUPM_DTYPE_ACTIVE) { 660 /* 661 * Return P-State domain for the specified CPU 662 */ 663 if (mach_state->ms_pstate.cma_domain) { 664 return (mach_state->ms_pstate.cma_domain->pm_domain); 665 } 666 } else if (type == CPUPM_DTYPE_IDLE) { 667 /* 668 * Return C-State domain for the specified CPU 669 */ 670 if (mach_state->ms_cstate.cma_domain) { 671 return (mach_state->ms_cstate.cma_domain->pm_domain); 672 } 673 } 674 return (CPUPM_NO_DOMAIN); 675 } 676 677 /*ARGSUSED*/ 678 uint_t 679 cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type, 680 cpupm_state_t *states) 681 { 682 int *speeds; 683 uint_t nspeeds, i; 684 685 /* 686 * Idle domain support unimplemented 687 */ 688 if (type != CPUPM_DTYPE_ACTIVE) { 689 return (0); 690 } 691 nspeeds = cpupm_get_speeds(cp, &speeds); 692 693 /* 694 * If the caller passes NULL for states, just return the 695 * number of states. 696 */ 697 if (states != NULL) { 698 for (i = 0; i < nspeeds; i++) { 699 states[i].cps_speed = speeds[i]; 700 states[i].cps_handle = (cpupm_handle_t)i; 701 } 702 } 703 cpupm_free_speeds(speeds, nspeeds); 704 return (nspeeds); 705 } 706 707 /*ARGSUSED*/ 708 int 709 cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state) 710 { 711 if (!cpupm_is_ready(cp)) 712 return (-1); 713 714 cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES); 715 716 return (0); 717 } 718 719 /*ARGSUSED*/ 720 /* 721 * Note: It is the responsibility of the users of 722 * cpupm_get_speeds() to free the memory allocated 723 * for speeds using cpupm_free_speeds() 724 */ 725 uint_t 726 cpupm_get_speeds(cpu_t *cp, int **speeds) 727 { 728 #ifndef __xpv 729 cpupm_mach_state_t *mach_state = 730 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 731 return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds)); 732 #else 733 return (0); 734 #endif 735 } 736 737 /*ARGSUSED*/ 738 void 739 cpupm_free_speeds(int *speeds, uint_t nspeeds) 740 { 741 #ifndef __xpv 742 cpu_acpi_free_speeds(speeds, nspeeds); 743 #endif 744 } 745 746 /* 747 * All CPU instances have been initialized successfully. 748 */ 749 boolean_t 750 cpupm_power_ready(cpu_t *cp) 751 { 752 return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready(cp)); 753 } 754 755 /* 756 * All CPU instances have been initialized successfully. 757 */ 758 boolean_t 759 cpupm_throttle_ready(cpu_t *cp) 760 { 761 return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready(cp)); 762 } 763 764 /* 765 * All CPU instances have been initialized successfully. 766 */ 767 boolean_t 768 cpupm_cstate_ready(cpu_t *cp) 769 { 770 return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready(cp)); 771 } 772 773 void 774 cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 775 { 776 cpu_t *cp = ctx; 777 cpupm_mach_state_t *mach_state = 778 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 779 cpupm_notification_t *entry; 780 781 mutex_enter(&mach_state->ms_lock); 782 for (entry = mach_state->ms_handlers; entry != NULL; 783 entry = entry->nq_next) { 784 entry->nq_handler(obj, val, entry->nq_ctx); 785 } 786 mutex_exit(&mach_state->ms_lock); 787 } 788 789 /*ARGSUSED*/ 790 void 791 cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx) 792 { 793 #ifndef __xpv 794 cpupm_mach_state_t *mach_state = 795 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 796 cpupm_notification_t *entry; 797 798 entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP); 799 entry->nq_handler = handler; 800 entry->nq_ctx = ctx; 801 mutex_enter(&mach_state->ms_lock); 802 if (mach_state->ms_handlers == NULL) { 803 entry->nq_next = NULL; 804 mach_state->ms_handlers = entry; 805 cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle, 806 cpupm_notify_handler, cp); 807 808 } else { 809 entry->nq_next = mach_state->ms_handlers; 810 mach_state->ms_handlers = entry; 811 } 812 mutex_exit(&mach_state->ms_lock); 813 #endif 814 } 815 816 /*ARGSUSED*/ 817 static void 818 cpupm_free_notify_handlers(cpu_t *cp) 819 { 820 #ifndef __xpv 821 cpupm_mach_state_t *mach_state = 822 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 823 cpupm_notification_t *entry; 824 cpupm_notification_t *next; 825 826 mutex_enter(&mach_state->ms_lock); 827 if (mach_state->ms_handlers == NULL) { 828 mutex_exit(&mach_state->ms_lock); 829 return; 830 } 831 if (mach_state->ms_acpi_handle != NULL) { 832 cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle, 833 cpupm_notify_handler); 834 } 835 entry = mach_state->ms_handlers; 836 while (entry != NULL) { 837 next = entry->nq_next; 838 kmem_free(entry, sizeof (cpupm_notification_t)); 839 entry = next; 840 } 841 mach_state->ms_handlers = NULL; 842 mutex_exit(&mach_state->ms_lock); 843 #endif 844 } 845 846 /* 847 * Get the current max speed from the ACPI _PPC object 848 */ 849 /*ARGSUSED*/ 850 int 851 cpupm_get_top_speed(cpu_t *cp) 852 { 853 #ifndef __xpv 854 cpupm_mach_state_t *mach_state; 855 cpu_acpi_handle_t handle; 856 int plat_level; 857 uint_t nspeeds; 858 int max_level; 859 860 mach_state = 861 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 862 handle = mach_state->ms_acpi_handle; 863 864 cpu_acpi_cache_ppc(handle); 865 plat_level = CPU_ACPI_PPC(handle); 866 867 nspeeds = CPU_ACPI_PSTATES_COUNT(handle); 868 869 max_level = nspeeds - 1; 870 if ((plat_level < 0) || (plat_level > max_level)) { 871 cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: " 872 "_PPC out of range %d", cp->cpu_id, plat_level); 873 plat_level = 0; 874 } 875 876 return (plat_level); 877 #else 878 return (0); 879 #endif 880 } 881 882 /* 883 * This notification handler is called whenever the ACPI _PPC 884 * object changes. The _PPC is a sort of governor on power levels. 885 * It sets an upper threshold on which, _PSS defined, power levels 886 * are usuable. The _PPC value is dynamic and may change as properties 887 * (i.e., thermal or AC source) of the system change. 888 */ 889 890 static void 891 cpupm_power_manage_notifications(void *ctx) 892 { 893 cpu_t *cp = ctx; 894 int top_speed; 895 896 top_speed = cpupm_get_top_speed(cp); 897 cpupm_redefine_max_activepwr_state(cp, top_speed); 898 } 899 900 /* ARGSUSED */ 901 static void 902 cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 903 { 904 #ifndef __xpv 905 906 cpu_t *cp = ctx; 907 cpupm_mach_state_t *mach_state = 908 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 909 910 if (mach_state == NULL) 911 return; 912 913 /* 914 * Currently, we handle _TPC,_CST and _PPC change notifications. 915 */ 916 if (val == CPUPM_TPC_CHANGE_NOTIFICATION && 917 mach_state->ms_caps & CPUPM_T_STATES) { 918 cpupm_throttle_manage_notification(ctx); 919 } else if (val == CPUPM_CST_CHANGE_NOTIFICATION && 920 mach_state->ms_caps & CPUPM_C_STATES) { 921 cpuidle_manage_cstates(ctx); 922 } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION && 923 mach_state->ms_caps & CPUPM_P_STATES) { 924 cpupm_power_manage_notifications(ctx); 925 } 926 #endif 927 } 928 929 /* 930 * Update cpupm cstate data each time CPU exits idle. 931 */ 932 void 933 cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end) 934 { 935 cs_data->cs_idle_exit = end; 936 } 937 938 /* 939 * Determine next cstate based on cpupm data. 940 * Update cpupm cstate data each time CPU goes idle. 941 * Do as much as possible in the idle state bookkeeping function because the 942 * performance impact while idle is minimal compared to in the wakeup function 943 * when there is real work to do. 944 */ 945 uint32_t 946 cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates, 947 uint32_t cs_count, hrtime_t start) 948 { 949 hrtime_t duration; 950 hrtime_t ave_interval; 951 hrtime_t ave_idle_time; 952 uint32_t i, smpl_cnt; 953 954 duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter; 955 scalehrtime(&duration); 956 cs_data->cs_idle += duration; 957 cs_data->cs_idle_enter = start; 958 959 smpl_cnt = ++cs_data->cs_cnt; 960 cs_data->cs_smpl_len = start - cs_data->cs_smpl_start; 961 scalehrtime(&cs_data->cs_smpl_len); 962 if (cs_data->cs_smpl_len > cpupm_cs_sample_interval) { 963 cs_data->cs_smpl_idle = cs_data->cs_idle; 964 cs_data->cs_idle = 0; 965 cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) / 966 cs_data->cs_smpl_len); 967 968 cs_data->cs_smpl_start = start; 969 cs_data->cs_cnt = 0; 970 971 /* 972 * Strand level C-state policy 973 * The cpu_acpi_cstate_t *cstates array is not required to 974 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3. 975 * There are cs_count entries in the cstates array. 976 * cs_data->cs_next_cstate contains the index of the next 977 * C-state this CPU should enter. 978 */ 979 ASSERT(cstates[0].cs_type == CPU_ACPI_C1); 980 981 /* 982 * Will CPU be idle long enough to save power? 983 */ 984 ave_idle_time = (cs_data->cs_smpl_idle / smpl_cnt) / 1000; 985 for (i = 1; i < cs_count; ++i) { 986 if (ave_idle_time < (cstates[i].cs_latency * 987 cpupm_cs_idle_save_tunable)) { 988 cs_count = i; 989 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 990 CPU, int, i); 991 } 992 } 993 994 /* 995 * Wakeup often (even when non-idle time is very short)? 996 * Some producer/consumer type loads fall into this category. 997 */ 998 ave_interval = (cs_data->cs_smpl_len / smpl_cnt) / 1000; 999 for (i = 1; i < cs_count; ++i) { 1000 if (ave_interval <= (cstates[i].cs_latency * 1001 cpupm_cs_idle_cost_tunable)) { 1002 cs_count = i; 1003 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 1004 CPU, int, (CPU_MAX_CSTATES + i)); 1005 } 1006 } 1007 1008 /* 1009 * Idle percent 1010 */ 1011 for (i = 1; i < cs_count; ++i) { 1012 switch (cstates[i].cs_type) { 1013 case CPU_ACPI_C2: 1014 if (cs_data->cs_smpl_idle_pct < 1015 cpupm_C2_idle_pct_tunable) { 1016 cs_count = i; 1017 DTRACE_PROBE2(cpupm__next__cstate, 1018 cpu_t *, CPU, int, 1019 ((2 * CPU_MAX_CSTATES) + i)); 1020 } 1021 break; 1022 1023 case CPU_ACPI_C3: 1024 if (cs_data->cs_smpl_idle_pct < 1025 cpupm_C3_idle_pct_tunable) { 1026 cs_count = i; 1027 DTRACE_PROBE2(cpupm__next__cstate, 1028 cpu_t *, CPU, int, 1029 ((2 * CPU_MAX_CSTATES) + i)); 1030 } 1031 break; 1032 } 1033 } 1034 1035 cs_data->cs_next_cstate = cs_count - 1; 1036 } 1037 1038 return (cs_data->cs_next_cstate); 1039 } 1040