1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/cpu_pm.h> 31 #include <sys/x86_archext.h> 32 #include <sys/sdt.h> 33 #include <sys/spl.h> 34 #include <sys/machsystm.h> 35 #include <sys/archsystm.h> 36 #include <sys/hpet.h> 37 #include <sys/acpi/acpi.h> 38 #include <sys/acpica.h> 39 #include <sys/cpupm.h> 40 #include <sys/cpu_idle.h> 41 #include <sys/cpu_acpi.h> 42 #include <sys/cpupm_throttle.h> 43 #include <sys/dtrace.h> 44 #include <sys/note.h> 45 46 /* 47 * This callback is used to build the PPM CPU domains once 48 * a CPU device has been started. The callback is initialized 49 * by the PPM driver to point to a routine that will build the 50 * domains. 51 */ 52 void (*cpupm_ppm_alloc_pstate_domains)(cpu_t *); 53 54 /* 55 * This callback is used to remove CPU from the PPM CPU domains 56 * when the cpu driver is detached. The callback is initialized 57 * by the PPM driver to point to a routine that will remove CPU 58 * from the domains. 59 */ 60 void (*cpupm_ppm_free_pstate_domains)(cpu_t *); 61 62 /* 63 * This callback is used to redefine the topspeed for a CPU device. 64 * Since all CPUs in a domain should have identical properties, this 65 * callback is initialized by the PPM driver to point to a routine 66 * that will redefine the topspeed for all devices in a CPU domain. 67 * This callback is exercised whenever an ACPI _PPC change notification 68 * is received by the CPU driver. 69 */ 70 void (*cpupm_redefine_topspeed)(void *); 71 72 /* 73 * This callback is used by the PPM driver to call into the CPU driver 74 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value). 75 */ 76 void (*cpupm_set_topspeed_callb)(void *, int); 77 78 /* 79 * This callback is used by the PPM driver to call into the CPU driver 80 * to set a new topspeed for a CPU. 81 */ 82 int (*cpupm_get_topspeed_callb)(void *); 83 84 static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *); 85 static void cpupm_free_notify_handlers(cpu_t *); 86 87 /* 88 * Until proven otherwise, all power states are manageable. 89 */ 90 static uint32_t cpupm_enabled = CPUPM_ALL_STATES; 91 92 cpupm_state_domains_t *cpupm_pstate_domains = NULL; 93 cpupm_state_domains_t *cpupm_tstate_domains = NULL; 94 cpupm_state_domains_t *cpupm_cstate_domains = NULL; 95 96 /* 97 * c-state tunables 98 * 99 * cpupm_cs_sample_interval is the length of time we wait before 100 * recalculating c-state statistics. When a CPU goes idle it checks 101 * to see if it has been longer than cpupm_cs_sample_interval since it last 102 * caculated which C-state to go to. 103 * 104 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle 105 * divided by time spent in the idle state transitions. 106 * A value of 10 means the CPU will not spend more than 1/10 of its time 107 * in idle latency. The worst case performance will be 90% of non Deep C-state 108 * kernel. 109 * 110 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state 111 * before it is worth going there. Expressed as a multiple of latency. 112 */ 113 uint32_t cpupm_cs_sample_interval = 100*1000*1000; /* 100 milliseconds */ 114 uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */ 115 uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */ 116 uint16_t cpupm_C2_idle_pct_tunable = 70; 117 uint16_t cpupm_C3_idle_pct_tunable = 80; 118 119 #ifndef __xpv 120 extern boolean_t cpupm_intel_init(cpu_t *); 121 extern boolean_t cpupm_amd_init(cpu_t *); 122 123 typedef struct cpupm_vendor { 124 boolean_t (*cpuv_init)(cpu_t *); 125 } cpupm_vendor_t; 126 127 /* 128 * Table of supported vendors. 129 */ 130 static cpupm_vendor_t cpupm_vendors[] = { 131 cpupm_intel_init, 132 cpupm_amd_init, 133 NULL 134 }; 135 #endif 136 137 /* 138 * Initialize the machine. 139 * See if a module exists for managing power for this CPU. 140 */ 141 /*ARGSUSED*/ 142 void 143 cpupm_init(cpu_t *cp) 144 { 145 #ifndef __xpv 146 cpupm_vendor_t *vendors; 147 cpupm_mach_state_t *mach_state; 148 struct machcpu *mcpu = &(cp->cpu_m); 149 static boolean_t first = B_TRUE; 150 int *speeds; 151 uint_t nspeeds; 152 int ret; 153 154 mach_state = cp->cpu_m.mcpu_pm_mach_state = 155 kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP); 156 mach_state->ms_caps = CPUPM_NO_STATES; 157 mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL); 158 159 mach_state->ms_acpi_handle = cpu_acpi_init(cp); 160 if (mach_state->ms_acpi_handle == NULL) { 161 cpupm_fini(cp); 162 cmn_err(CE_WARN, "!cpupm_init: processor %d: " 163 "unable to get ACPI handle", cp->cpu_id); 164 cmn_err(CE_NOTE, "!CPU power management will not function."); 165 CPUPM_DISABLE(); 166 first = B_FALSE; 167 return; 168 } 169 170 /* 171 * Loop through the CPU management module table and see if 172 * any of the modules implement CPU power management 173 * for this CPU. 174 */ 175 for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) { 176 if (vendors->cpuv_init(cp)) 177 break; 178 } 179 180 /* 181 * Nope, we can't power manage this CPU. 182 */ 183 if (vendors == NULL) { 184 cpupm_fini(cp); 185 CPUPM_DISABLE(); 186 first = B_FALSE; 187 return; 188 } 189 190 /* 191 * If P-state support exists for this system, then initialize it. 192 */ 193 if (mach_state->ms_pstate.cma_ops != NULL) { 194 ret = mach_state->ms_pstate.cma_ops->cpus_init(cp); 195 if (ret != 0) { 196 mach_state->ms_pstate.cma_ops = NULL; 197 cpupm_disable(CPUPM_P_STATES); 198 } else { 199 nspeeds = cpupm_get_speeds(cp, &speeds); 200 if (nspeeds == 0) { 201 cmn_err(CE_NOTE, "!cpupm_init: processor %d:" 202 " no speeds to manage", cp->cpu_id); 203 } else { 204 cpupm_set_supp_freqs(cp, speeds, nspeeds); 205 cpupm_free_speeds(speeds, nspeeds); 206 mach_state->ms_caps |= CPUPM_P_STATES; 207 } 208 } 209 } 210 211 if (mach_state->ms_tstate.cma_ops != NULL) { 212 ret = mach_state->ms_tstate.cma_ops->cpus_init(cp); 213 if (ret != 0) { 214 mach_state->ms_tstate.cma_ops = NULL; 215 cpupm_disable(CPUPM_T_STATES); 216 } else { 217 mach_state->ms_caps |= CPUPM_T_STATES; 218 } 219 } 220 221 /* 222 * If C-states support exists for this system, then initialize it. 223 */ 224 if (mach_state->ms_cstate.cma_ops != NULL) { 225 ret = mach_state->ms_cstate.cma_ops->cpus_init(cp); 226 if (ret != 0) { 227 mach_state->ms_cstate.cma_ops = NULL; 228 mcpu->max_cstates = CPU_ACPI_C1; 229 cpupm_disable(CPUPM_C_STATES); 230 idle_cpu = non_deep_idle_cpu; 231 disp_enq_thread = non_deep_idle_disp_enq_thread; 232 } else if (cpu_deep_cstates_supported()) { 233 mcpu->max_cstates = cpu_acpi_get_max_cstates( 234 mach_state->ms_acpi_handle); 235 if (mcpu->max_cstates > CPU_ACPI_C1) { 236 (void) cstate_timer_callback( 237 CST_EVENT_MULTIPLE_CSTATES); 238 CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 239 mcpu->mcpu_idle_type = CPU_ACPI_C1; 240 disp_enq_thread = cstate_wakeup; 241 } else { 242 (void) cstate_timer_callback( 243 CST_EVENT_ONE_CSTATE); 244 } 245 mach_state->ms_caps |= CPUPM_C_STATES; 246 } else { 247 mcpu->max_cstates = CPU_ACPI_C1; 248 idle_cpu = non_deep_idle_cpu; 249 disp_enq_thread = non_deep_idle_disp_enq_thread; 250 } 251 } 252 253 254 if (mach_state->ms_caps == CPUPM_NO_STATES) { 255 cpupm_fini(cp); 256 CPUPM_DISABLE(); 257 first = B_FALSE; 258 return; 259 } 260 261 if ((mach_state->ms_caps & CPUPM_T_STATES) || 262 (mach_state->ms_caps & CPUPM_P_STATES) || 263 (mach_state->ms_caps & CPUPM_C_STATES)) { 264 cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp); 265 if (first) { 266 acpica_write_cpupm_capabilities( 267 mach_state->ms_caps & CPUPM_P_STATES, 268 mach_state->ms_caps & CPUPM_C_STATES); 269 } 270 } 271 first = B_FALSE; 272 #endif 273 } 274 275 /* 276 * Free any resources allocated during cpupm initialization or cpupm start. 277 */ 278 /*ARGSUSED*/ 279 void 280 cpupm_free(cpu_t *cp, boolean_t cpupm_stop) 281 { 282 #ifndef __xpv 283 cpupm_mach_state_t *mach_state = 284 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 285 286 if (mach_state == NULL) 287 return; 288 289 if (mach_state->ms_pstate.cma_ops != NULL) { 290 if (cpupm_stop) 291 mach_state->ms_pstate.cma_ops->cpus_stop(cp); 292 else 293 mach_state->ms_pstate.cma_ops->cpus_fini(cp); 294 mach_state->ms_pstate.cma_ops = NULL; 295 } 296 297 if (mach_state->ms_tstate.cma_ops != NULL) { 298 if (cpupm_stop) 299 mach_state->ms_tstate.cma_ops->cpus_stop(cp); 300 else 301 mach_state->ms_tstate.cma_ops->cpus_fini(cp); 302 mach_state->ms_tstate.cma_ops = NULL; 303 } 304 305 if (mach_state->ms_cstate.cma_ops != NULL) { 306 if (cpupm_stop) 307 mach_state->ms_cstate.cma_ops->cpus_stop(cp); 308 else 309 mach_state->ms_cstate.cma_ops->cpus_fini(cp); 310 311 mach_state->ms_cstate.cma_ops = NULL; 312 } 313 314 cpupm_free_notify_handlers(cp); 315 316 if (mach_state->ms_acpi_handle != NULL) { 317 cpu_acpi_fini(mach_state->ms_acpi_handle); 318 mach_state->ms_acpi_handle = NULL; 319 } 320 321 mutex_destroy(&mach_state->ms_lock); 322 kmem_free(mach_state, sizeof (cpupm_mach_state_t)); 323 cp->cpu_m.mcpu_pm_mach_state = NULL; 324 #endif 325 } 326 327 void 328 cpupm_fini(cpu_t *cp) 329 { 330 /* 331 * call (*cpus_fini)() ops to release the cpupm resource 332 * in the P/C/T-state driver 333 */ 334 cpupm_free(cp, B_FALSE); 335 } 336 337 void 338 cpupm_start(cpu_t *cp) 339 { 340 cpupm_init(cp); 341 } 342 343 void 344 cpupm_stop(cpu_t *cp) 345 { 346 /* 347 * call (*cpus_stop)() ops to reclaim the cpupm resource 348 * in the P/C/T-state driver 349 */ 350 cpupm_free(cp, B_TRUE); 351 } 352 353 /* 354 * If A CPU has started and at least one power state is manageable, 355 * then the CPU is ready for power management. 356 */ 357 boolean_t 358 cpupm_is_ready(cpu_t *cp) 359 { 360 #ifndef __xpv 361 cpupm_mach_state_t *mach_state = 362 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 363 uint32_t cpupm_caps = mach_state->ms_caps; 364 365 if (cpupm_enabled == CPUPM_NO_STATES) 366 return (B_FALSE); 367 368 if ((cpupm_caps & CPUPM_T_STATES) || 369 (cpupm_caps & CPUPM_P_STATES) || 370 (cpupm_caps & CPUPM_C_STATES)) 371 372 return (B_TRUE); 373 return (B_FALSE); 374 #else 375 _NOTE(ARGUNUSED(cp)); 376 return (B_FALSE); 377 #endif 378 } 379 380 boolean_t 381 cpupm_is_enabled(uint32_t state) 382 { 383 return ((cpupm_enabled & state) == state); 384 } 385 386 /* 387 * By default, all states are enabled. 388 */ 389 void 390 cpupm_disable(uint32_t state) 391 { 392 393 if (state & CPUPM_P_STATES) { 394 cpupm_free_domains(&cpupm_pstate_domains); 395 } 396 if (state & CPUPM_T_STATES) { 397 cpupm_free_domains(&cpupm_tstate_domains); 398 } 399 if (state & CPUPM_C_STATES) { 400 cpupm_free_domains(&cpupm_cstate_domains); 401 } 402 cpupm_enabled &= ~state; 403 } 404 405 /* 406 * Allocate power domains for C,P and T States 407 */ 408 void 409 cpupm_alloc_domains(cpu_t *cp, int state) 410 { 411 cpupm_mach_state_t *mach_state = 412 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 413 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 414 cpupm_state_domains_t **dom_ptr; 415 cpupm_state_domains_t *dptr; 416 cpupm_state_domains_t **mach_dom_state_ptr; 417 uint32_t domain; 418 uint32_t type; 419 420 switch (state) { 421 case CPUPM_P_STATES: 422 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) { 423 domain = CPU_ACPI_PSD(handle).sd_domain; 424 type = CPU_ACPI_PSD(handle).sd_type; 425 } else { 426 mutex_enter(&cpu_lock); 427 domain = cpuid_get_chipid(cp); 428 mutex_exit(&cpu_lock); 429 type = CPU_ACPI_HW_ALL; 430 } 431 dom_ptr = &cpupm_pstate_domains; 432 mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain; 433 break; 434 case CPUPM_T_STATES: 435 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) { 436 domain = CPU_ACPI_TSD(handle).sd_domain; 437 type = CPU_ACPI_TSD(handle).sd_type; 438 } else { 439 mutex_enter(&cpu_lock); 440 domain = cpuid_get_chipid(cp); 441 mutex_exit(&cpu_lock); 442 type = CPU_ACPI_HW_ALL; 443 } 444 dom_ptr = &cpupm_tstate_domains; 445 mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain; 446 break; 447 case CPUPM_C_STATES: 448 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) { 449 domain = CPU_ACPI_CSD(handle).sd_domain; 450 type = CPU_ACPI_CSD(handle).sd_type; 451 } else { 452 mutex_enter(&cpu_lock); 453 domain = cpuid_get_coreid(cp); 454 mutex_exit(&cpu_lock); 455 type = CPU_ACPI_HW_ALL; 456 } 457 dom_ptr = &cpupm_cstate_domains; 458 mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain; 459 break; 460 default: 461 return; 462 } 463 464 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 465 if (dptr->pm_domain == domain) 466 break; 467 } 468 469 /* new domain is created and linked at the head */ 470 if (dptr == NULL) { 471 dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP); 472 dptr->pm_domain = domain; 473 dptr->pm_type = type; 474 dptr->pm_next = *dom_ptr; 475 mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN, 476 (void *)ipltospl(DISP_LEVEL)); 477 CPUSET_ZERO(dptr->pm_cpus); 478 *dom_ptr = dptr; 479 } 480 CPUSET_ADD(dptr->pm_cpus, cp->cpu_id); 481 *mach_dom_state_ptr = dptr; 482 } 483 484 /* 485 * Free C, P or T state power domains 486 */ 487 void 488 cpupm_free_domains(cpupm_state_domains_t **dom_ptr) 489 { 490 cpupm_state_domains_t *this_domain, *next_domain; 491 492 this_domain = *dom_ptr; 493 while (this_domain != NULL) { 494 next_domain = this_domain->pm_next; 495 mutex_destroy(&this_domain->pm_lock); 496 kmem_free((void *)this_domain, 497 sizeof (cpupm_state_domains_t)); 498 this_domain = next_domain; 499 } 500 *dom_ptr = NULL; 501 } 502 503 /* 504 * Remove CPU from C, P or T state power domains 505 */ 506 void 507 cpupm_remove_domains(cpu_t *cp, int state, cpupm_state_domains_t **dom_ptr) 508 { 509 cpupm_mach_state_t *mach_state = 510 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 511 cpupm_state_domains_t *dptr; 512 uint32_t pm_domain; 513 ulong_t iflag; 514 515 ASSERT(mach_state); 516 517 switch (state) { 518 case CPUPM_P_STATES: 519 pm_domain = mach_state->ms_pstate.cma_domain->pm_domain; 520 break; 521 case CPUPM_T_STATES: 522 pm_domain = mach_state->ms_tstate.cma_domain->pm_domain; 523 break; 524 case CPUPM_C_STATES: 525 pm_domain = mach_state->ms_cstate.cma_domain->pm_domain; 526 break; 527 default: 528 return; 529 } 530 531 /* 532 * Find the CPU C, P or T state power domain 533 */ 534 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 535 if (dptr->pm_domain == pm_domain) 536 break; 537 } 538 539 /* 540 * return if no matched domain found 541 */ 542 if (dptr == NULL) 543 return; 544 545 /* 546 * We found one matched power domain, remove CPU from its cpuset. 547 * Interrupt is disabled here to avoid the race conditions between 548 * event change notification and cpu remove. 549 */ 550 iflag = intr_clear(); 551 mutex_enter(&dptr->pm_lock); 552 if (CPU_IN_SET(dptr->pm_cpus, cp->cpu_id)) 553 CPUSET_DEL(dptr->pm_cpus, cp->cpu_id); 554 mutex_exit(&dptr->pm_lock); 555 intr_restore(iflag); 556 } 557 558 void 559 cpupm_alloc_ms_cstate(cpu_t *cp) 560 { 561 cpupm_mach_state_t *mach_state; 562 cpupm_mach_acpi_state_t *ms_cstate; 563 564 mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 565 ms_cstate = &mach_state->ms_cstate; 566 ASSERT(ms_cstate->cma_state.cstate == NULL); 567 ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t), 568 KM_SLEEP); 569 ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1; 570 } 571 572 void 573 cpupm_free_ms_cstate(cpu_t *cp) 574 { 575 cpupm_mach_state_t *mach_state = 576 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 577 cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate; 578 579 if (ms_cstate->cma_state.cstate != NULL) { 580 kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t)); 581 ms_cstate->cma_state.cstate = NULL; 582 } 583 } 584 585 void 586 cpupm_state_change(cpu_t *cp, int level, int state) 587 { 588 cpupm_mach_state_t *mach_state = 589 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 590 cpupm_state_ops_t *state_ops; 591 cpupm_state_domains_t *state_domain; 592 cpuset_t set; 593 594 DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level); 595 596 if (mach_state == NULL) { 597 return; 598 } 599 600 switch (state) { 601 case CPUPM_P_STATES: 602 state_ops = mach_state->ms_pstate.cma_ops; 603 state_domain = mach_state->ms_pstate.cma_domain; 604 break; 605 case CPUPM_T_STATES: 606 state_ops = mach_state->ms_tstate.cma_ops; 607 state_domain = mach_state->ms_tstate.cma_domain; 608 break; 609 default: 610 break; 611 } 612 613 switch (state_domain->pm_type) { 614 case CPU_ACPI_SW_ANY: 615 /* 616 * A request on any CPU in the domain transitions the domain 617 */ 618 CPUSET_ONLY(set, cp->cpu_id); 619 state_ops->cpus_change(set, level); 620 break; 621 case CPU_ACPI_SW_ALL: 622 /* 623 * All CPUs in the domain must request the transition 624 */ 625 case CPU_ACPI_HW_ALL: 626 /* 627 * P/T-state transitions are coordinated by the hardware 628 * For now, request the transition on all CPUs in the domain, 629 * but looking ahead we can probably be smarter about this. 630 */ 631 mutex_enter(&state_domain->pm_lock); 632 state_ops->cpus_change(state_domain->pm_cpus, level); 633 mutex_exit(&state_domain->pm_lock); 634 break; 635 default: 636 cmn_err(CE_NOTE, "Unknown domain coordination type: %d", 637 state_domain->pm_type); 638 } 639 } 640 641 /* 642 * CPU PM interfaces exposed to the CPU power manager 643 */ 644 /*ARGSUSED*/ 645 id_t 646 cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type) 647 { 648 cpupm_mach_state_t *mach_state = 649 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 650 651 if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) && 652 !cpupm_is_enabled(CPUPM_C_STATES))) { 653 return (CPUPM_NO_DOMAIN); 654 } 655 if (type == CPUPM_DTYPE_ACTIVE) { 656 /* 657 * Return P-State domain for the specified CPU 658 */ 659 if (mach_state->ms_pstate.cma_domain) { 660 return (mach_state->ms_pstate.cma_domain->pm_domain); 661 } 662 } else if (type == CPUPM_DTYPE_IDLE) { 663 /* 664 * Return C-State domain for the specified CPU 665 */ 666 if (mach_state->ms_cstate.cma_domain) { 667 return (mach_state->ms_cstate.cma_domain->pm_domain); 668 } 669 } 670 return (CPUPM_NO_DOMAIN); 671 } 672 673 /*ARGSUSED*/ 674 uint_t 675 cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type, 676 cpupm_state_t *states) 677 { 678 int *speeds; 679 uint_t nspeeds, i; 680 681 /* 682 * Idle domain support unimplemented 683 */ 684 if (type != CPUPM_DTYPE_ACTIVE) { 685 return (0); 686 } 687 nspeeds = cpupm_get_speeds(cp, &speeds); 688 689 /* 690 * If the caller passes NULL for states, just return the 691 * number of states. 692 */ 693 if (states != NULL) { 694 for (i = 0; i < nspeeds; i++) { 695 states[i].cps_speed = speeds[i]; 696 states[i].cps_handle = (cpupm_handle_t)i; 697 } 698 } 699 cpupm_free_speeds(speeds, nspeeds); 700 return (nspeeds); 701 } 702 703 /*ARGSUSED*/ 704 int 705 cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state) 706 { 707 if (!cpupm_is_ready(cp)) 708 return (-1); 709 710 cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES); 711 712 return (0); 713 } 714 715 /*ARGSUSED*/ 716 /* 717 * Note: It is the responsibility of the users of 718 * cpupm_get_speeds() to free the memory allocated 719 * for speeds using cpupm_free_speeds() 720 */ 721 uint_t 722 cpupm_get_speeds(cpu_t *cp, int **speeds) 723 { 724 #ifndef __xpv 725 cpupm_mach_state_t *mach_state = 726 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 727 return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds)); 728 #else 729 return (0); 730 #endif 731 } 732 733 /*ARGSUSED*/ 734 void 735 cpupm_free_speeds(int *speeds, uint_t nspeeds) 736 { 737 #ifndef __xpv 738 cpu_acpi_free_speeds(speeds, nspeeds); 739 #endif 740 } 741 742 /* 743 * All CPU instances have been initialized successfully. 744 */ 745 boolean_t 746 cpupm_power_ready(cpu_t *cp) 747 { 748 return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready(cp)); 749 } 750 751 /* 752 * All CPU instances have been initialized successfully. 753 */ 754 boolean_t 755 cpupm_throttle_ready(cpu_t *cp) 756 { 757 return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready(cp)); 758 } 759 760 /* 761 * All CPU instances have been initialized successfully. 762 */ 763 boolean_t 764 cpupm_cstate_ready(cpu_t *cp) 765 { 766 return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready(cp)); 767 } 768 769 void 770 cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 771 { 772 cpu_t *cp = ctx; 773 cpupm_mach_state_t *mach_state = 774 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 775 cpupm_notification_t *entry; 776 777 mutex_enter(&mach_state->ms_lock); 778 for (entry = mach_state->ms_handlers; entry != NULL; 779 entry = entry->nq_next) { 780 entry->nq_handler(obj, val, entry->nq_ctx); 781 } 782 mutex_exit(&mach_state->ms_lock); 783 } 784 785 /*ARGSUSED*/ 786 void 787 cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx) 788 { 789 #ifndef __xpv 790 cpupm_mach_state_t *mach_state = 791 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 792 cpupm_notification_t *entry; 793 794 entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP); 795 entry->nq_handler = handler; 796 entry->nq_ctx = ctx; 797 mutex_enter(&mach_state->ms_lock); 798 if (mach_state->ms_handlers == NULL) { 799 entry->nq_next = NULL; 800 mach_state->ms_handlers = entry; 801 cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle, 802 cpupm_notify_handler, cp); 803 804 } else { 805 entry->nq_next = mach_state->ms_handlers; 806 mach_state->ms_handlers = entry; 807 } 808 mutex_exit(&mach_state->ms_lock); 809 #endif 810 } 811 812 /*ARGSUSED*/ 813 static void 814 cpupm_free_notify_handlers(cpu_t *cp) 815 { 816 #ifndef __xpv 817 cpupm_mach_state_t *mach_state = 818 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 819 cpupm_notification_t *entry; 820 cpupm_notification_t *next; 821 822 mutex_enter(&mach_state->ms_lock); 823 if (mach_state->ms_handlers == NULL) { 824 mutex_exit(&mach_state->ms_lock); 825 return; 826 } 827 if (mach_state->ms_acpi_handle != NULL) { 828 cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle, 829 cpupm_notify_handler); 830 } 831 entry = mach_state->ms_handlers; 832 while (entry != NULL) { 833 next = entry->nq_next; 834 kmem_free(entry, sizeof (cpupm_notification_t)); 835 entry = next; 836 } 837 mach_state->ms_handlers = NULL; 838 mutex_exit(&mach_state->ms_lock); 839 #endif 840 } 841 842 /* 843 * Get the current max speed from the ACPI _PPC object 844 */ 845 /*ARGSUSED*/ 846 int 847 cpupm_get_top_speed(cpu_t *cp) 848 { 849 #ifndef __xpv 850 cpupm_mach_state_t *mach_state; 851 cpu_acpi_handle_t handle; 852 int plat_level; 853 uint_t nspeeds; 854 int max_level; 855 856 mach_state = 857 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 858 handle = mach_state->ms_acpi_handle; 859 860 cpu_acpi_cache_ppc(handle); 861 plat_level = CPU_ACPI_PPC(handle); 862 863 nspeeds = CPU_ACPI_PSTATES_COUNT(handle); 864 865 max_level = nspeeds - 1; 866 if ((plat_level < 0) || (plat_level > max_level)) { 867 cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: " 868 "_PPC out of range %d", cp->cpu_id, plat_level); 869 plat_level = 0; 870 } 871 872 return (plat_level); 873 #else 874 return (0); 875 #endif 876 } 877 878 /* 879 * This notification handler is called whenever the ACPI _PPC 880 * object changes. The _PPC is a sort of governor on power levels. 881 * It sets an upper threshold on which, _PSS defined, power levels 882 * are usuable. The _PPC value is dynamic and may change as properties 883 * (i.e., thermal or AC source) of the system change. 884 */ 885 886 static void 887 cpupm_power_manage_notifications(void *ctx) 888 { 889 cpu_t *cp = ctx; 890 int top_speed; 891 892 top_speed = cpupm_get_top_speed(cp); 893 cpupm_redefine_max_activepwr_state(cp, top_speed); 894 } 895 896 /* ARGSUSED */ 897 static void 898 cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 899 { 900 #ifndef __xpv 901 902 cpu_t *cp = ctx; 903 cpupm_mach_state_t *mach_state = 904 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 905 906 if (mach_state == NULL) 907 return; 908 909 /* 910 * Currently, we handle _TPC,_CST and _PPC change notifications. 911 */ 912 if (val == CPUPM_TPC_CHANGE_NOTIFICATION && 913 mach_state->ms_caps & CPUPM_T_STATES) { 914 cpupm_throttle_manage_notification(ctx); 915 } else if (val == CPUPM_CST_CHANGE_NOTIFICATION && 916 mach_state->ms_caps & CPUPM_C_STATES) { 917 cpuidle_manage_cstates(ctx); 918 } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION && 919 mach_state->ms_caps & CPUPM_P_STATES) { 920 cpupm_power_manage_notifications(ctx); 921 } 922 #endif 923 } 924 925 /* 926 * Update cpupm cstate data each time CPU exits idle. 927 */ 928 void 929 cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end) 930 { 931 cs_data->cs_idle_exit = end; 932 } 933 934 /* 935 * Determine next cstate based on cpupm data. 936 * Update cpupm cstate data each time CPU goes idle. 937 * Do as much as possible in the idle state bookkeeping function because the 938 * performance impact while idle is minimal compared to in the wakeup function 939 * when there is real work to do. 940 */ 941 uint32_t 942 cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates, 943 uint32_t cs_count, hrtime_t start) 944 { 945 hrtime_t duration; 946 hrtime_t ave_interval; 947 hrtime_t ave_idle_time; 948 uint32_t i, smpl_cnt; 949 950 duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter; 951 scalehrtime(&duration); 952 cs_data->cs_idle += duration; 953 cs_data->cs_idle_enter = start; 954 955 smpl_cnt = ++cs_data->cs_cnt; 956 cs_data->cs_smpl_len = start - cs_data->cs_smpl_start; 957 scalehrtime(&cs_data->cs_smpl_len); 958 if (cs_data->cs_smpl_len > cpupm_cs_sample_interval) { 959 cs_data->cs_smpl_idle = cs_data->cs_idle; 960 cs_data->cs_idle = 0; 961 cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) / 962 cs_data->cs_smpl_len); 963 964 cs_data->cs_smpl_start = start; 965 cs_data->cs_cnt = 0; 966 967 /* 968 * Strand level C-state policy 969 * The cpu_acpi_cstate_t *cstates array is not required to 970 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3. 971 * There are cs_count entries in the cstates array. 972 * cs_data->cs_next_cstate contains the index of the next 973 * C-state this CPU should enter. 974 */ 975 ASSERT(cstates[0].cs_type == CPU_ACPI_C1); 976 977 /* 978 * Will CPU be idle long enough to save power? 979 */ 980 ave_idle_time = (cs_data->cs_smpl_idle / smpl_cnt) / 1000; 981 for (i = 1; i < cs_count; ++i) { 982 if (ave_idle_time < (cstates[i].cs_latency * 983 cpupm_cs_idle_save_tunable)) { 984 cs_count = i; 985 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 986 CPU, int, i); 987 } 988 } 989 990 /* 991 * Wakeup often (even when non-idle time is very short)? 992 * Some producer/consumer type loads fall into this category. 993 */ 994 ave_interval = (cs_data->cs_smpl_len / smpl_cnt) / 1000; 995 for (i = 1; i < cs_count; ++i) { 996 if (ave_interval <= (cstates[i].cs_latency * 997 cpupm_cs_idle_cost_tunable)) { 998 cs_count = i; 999 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 1000 CPU, int, (CPU_MAX_CSTATES + i)); 1001 } 1002 } 1003 1004 /* 1005 * Idle percent 1006 */ 1007 for (i = 1; i < cs_count; ++i) { 1008 switch (cstates[i].cs_type) { 1009 case CPU_ACPI_C2: 1010 if (cs_data->cs_smpl_idle_pct < 1011 cpupm_C2_idle_pct_tunable) { 1012 cs_count = i; 1013 DTRACE_PROBE2(cpupm__next__cstate, 1014 cpu_t *, CPU, int, 1015 ((2 * CPU_MAX_CSTATES) + i)); 1016 } 1017 break; 1018 1019 case CPU_ACPI_C3: 1020 if (cs_data->cs_smpl_idle_pct < 1021 cpupm_C3_idle_pct_tunable) { 1022 cs_count = i; 1023 DTRACE_PROBE2(cpupm__next__cstate, 1024 cpu_t *, CPU, int, 1025 ((2 * CPU_MAX_CSTATES) + i)); 1026 } 1027 break; 1028 } 1029 } 1030 1031 cs_data->cs_next_cstate = cs_count - 1; 1032 } 1033 1034 return (cs_data->cs_next_cstate); 1035 } 1036