1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/x86_archext.h> 31 #include <sys/machsystm.h> 32 #include <sys/x_call.h> 33 #include <sys/stat.h> 34 #include <sys/acpi/acpi.h> 35 #include <sys/acpica.h> 36 #include <sys/cpu_acpi.h> 37 #include <sys/cpu_idle.h> 38 #include <sys/cpupm.h> 39 #include <sys/cpu_event.h> 40 #include <sys/hpet.h> 41 #include <sys/archsystm.h> 42 #include <vm/hat_i86.h> 43 #include <sys/dtrace.h> 44 #include <sys/sdt.h> 45 #include <sys/callb.h> 46 47 #define CSTATE_USING_HPET 1 48 #define CSTATE_USING_LAT 2 49 50 extern void cpu_idle_adaptive(void); 51 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data, 52 cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start); 53 54 static int cpu_idle_init(cpu_t *); 55 static void cpu_idle_fini(cpu_t *); 56 static boolean_t cpu_deep_idle_callb(void *arg, int code); 57 static boolean_t cpu_idle_cpr_callb(void *arg, int code); 58 static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate); 59 60 static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer); 61 62 /* 63 * the flag of always-running local APIC timer. 64 * the flag of HPET Timer use in deep cstate. 65 */ 66 static boolean_t cpu_cstate_arat = B_FALSE; 67 static boolean_t cpu_cstate_hpet = B_FALSE; 68 69 /* 70 * Interfaces for modules implementing Intel's deep c-state. 71 */ 72 cpupm_state_ops_t cpu_idle_ops = { 73 "Generic ACPI C-state Support", 74 cpu_idle_init, 75 cpu_idle_fini, 76 NULL 77 }; 78 79 static kmutex_t cpu_idle_callb_mutex; 80 static callb_id_t cpu_deep_idle_callb_id; 81 static callb_id_t cpu_idle_cpr_callb_id; 82 static uint_t cpu_idle_cfg_state; 83 84 static kmutex_t cpu_idle_mutex; 85 86 cpu_idle_kstat_t cpu_idle_kstat = { 87 { "address_space_id", KSTAT_DATA_STRING }, 88 { "latency", KSTAT_DATA_UINT32 }, 89 { "power", KSTAT_DATA_UINT32 }, 90 }; 91 92 /* 93 * kstat update function of the c-state info 94 */ 95 static int 96 cpu_idle_kstat_update(kstat_t *ksp, int flag) 97 { 98 cpu_acpi_cstate_t *cstate = ksp->ks_private; 99 100 if (flag == KSTAT_WRITE) { 101 return (EACCES); 102 } 103 104 if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 105 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 106 "FFixedHW"); 107 } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) { 108 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 109 "SystemIO"); 110 } else { 111 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 112 "Unsupported"); 113 } 114 115 cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency; 116 cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power; 117 118 return (0); 119 } 120 121 /* 122 * Used during configuration callbacks to manage implementation specific 123 * details of the hardware timer used during Deep C-state. 124 */ 125 boolean_t 126 cstate_timer_callback(int code) 127 { 128 if (cpu_cstate_arat) { 129 return (B_TRUE); 130 } else if (cpu_cstate_hpet) { 131 return (hpet.callback(code)); 132 } 133 return (B_FALSE); 134 } 135 136 /* 137 * Some Local APIC Timers do not work during Deep C-states. 138 * The Deep C-state idle function uses this function to ensure it is using a 139 * hardware timer that works during Deep C-states. This function also 140 * switches the timer back to the LACPI Timer after Deep C-state. 141 */ 142 static boolean_t 143 cstate_use_timer(hrtime_t *lapic_expire, int timer) 144 { 145 if (cpu_cstate_arat) 146 return (B_TRUE); 147 148 /* 149 * We have to return B_FALSE if no arat or hpet support 150 */ 151 if (!cpu_cstate_hpet) 152 return (B_FALSE); 153 154 switch (timer) { 155 case CSTATE_USING_HPET: 156 return (hpet.use_hpet_timer(lapic_expire)); 157 case CSTATE_USING_LAT: 158 hpet.use_lapic_timer(*lapic_expire); 159 return (B_TRUE); 160 default: 161 return (B_FALSE); 162 } 163 } 164 165 /* 166 * c-state wakeup function. 167 * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals 168 * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State. 169 */ 170 void 171 cstate_wakeup(cpu_t *cp, int bound) 172 { 173 struct machcpu *mcpu = &(cp->cpu_m); 174 volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait; 175 cpupart_t *cpu_part; 176 uint_t cpu_found; 177 processorid_t cpu_sid; 178 179 cpu_part = cp->cpu_part; 180 cpu_sid = cp->cpu_seqid; 181 /* 182 * Clear the halted bit for that CPU since it will be woken up 183 * in a moment. 184 */ 185 if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) { 186 /* 187 * Clear the halted bit for that CPU since it will be 188 * poked in a moment. 189 */ 190 bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid); 191 192 /* 193 * We may find the current CPU present in the halted cpuset 194 * if we're in the context of an interrupt that occurred 195 * before we had a chance to clear our bit in cpu_idle(). 196 * Waking ourself is obviously unnecessary, since if 197 * we're here, we're not halted. 198 */ 199 if (cp != CPU) { 200 /* 201 * Use correct wakeup mechanism 202 */ 203 if ((mcpu_mwait != NULL) && 204 (*mcpu_mwait == MWAIT_HALTED)) 205 MWAIT_WAKEUP(cp); 206 else 207 poke_cpu(cp->cpu_id); 208 } 209 return; 210 } else { 211 /* 212 * This cpu isn't halted, but it's idle or undergoing a 213 * context switch. No need to awaken anyone else. 214 */ 215 if (cp->cpu_thread == cp->cpu_idle_thread || 216 cp->cpu_disp_flags & CPU_DISP_DONTSTEAL) 217 return; 218 } 219 220 /* 221 * No need to wake up other CPUs if the thread we just enqueued 222 * is bound. 223 */ 224 if (bound) 225 return; 226 227 228 /* 229 * See if there's any other halted CPUs. If there are, then 230 * select one, and awaken it. 231 * It's possible that after we find a CPU, somebody else 232 * will awaken it before we get the chance. 233 * In that case, look again. 234 */ 235 do { 236 cpu_found = bitset_find(&cpu_part->cp_haltset); 237 if (cpu_found == (uint_t)-1) 238 return; 239 240 } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset, 241 cpu_found) < 0); 242 243 /* 244 * Must use correct wakeup mechanism to avoid lost wakeup of 245 * alternate cpu. 246 */ 247 if (cpu_found != CPU->cpu_seqid) { 248 mcpu_mwait = cpu[cpu_found]->cpu_m.mcpu_mwait; 249 if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED)) 250 MWAIT_WAKEUP(cpu_seq[cpu_found]); 251 else 252 poke_cpu(cpu_seq[cpu_found]->cpu_id); 253 } 254 } 255 256 /* 257 * Function called by CPU idle notification framework to check whether CPU 258 * has been awakened. It will be called with interrupt disabled. 259 * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle 260 * notification framework. 261 */ 262 static void 263 acpi_cpu_mwait_check_wakeup(void *arg) 264 { 265 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 266 267 ASSERT(arg != NULL); 268 if (*mcpu_mwait != MWAIT_HALTED) { 269 /* 270 * CPU has been awakened, notify CPU idle notification system. 271 */ 272 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 273 } else { 274 /* 275 * Toggle interrupt flag to detect pending interrupts. 276 * If interrupt happened, do_interrupt() will notify CPU idle 277 * notification framework so no need to call cpu_idle_exit() 278 * here. 279 */ 280 sti(); 281 SMT_PAUSE(); 282 cli(); 283 } 284 } 285 286 static void 287 acpi_cpu_mwait_ipi_check_wakeup(void *arg) 288 { 289 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 290 291 ASSERT(arg != NULL); 292 if (*mcpu_mwait != MWAIT_WAKEUP_IPI) { 293 /* 294 * CPU has been awakened, notify CPU idle notification system. 295 */ 296 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 297 } else { 298 /* 299 * Toggle interrupt flag to detect pending interrupts. 300 * If interrupt happened, do_interrupt() will notify CPU idle 301 * notification framework so no need to call cpu_idle_exit() 302 * here. 303 */ 304 sti(); 305 SMT_PAUSE(); 306 cli(); 307 } 308 } 309 310 /*ARGSUSED*/ 311 static void 312 acpi_cpu_check_wakeup(void *arg) 313 { 314 /* 315 * Toggle interrupt flag to detect pending interrupts. 316 * If interrupt happened, do_interrupt() will notify CPU idle 317 * notification framework so no need to call cpu_idle_exit() here. 318 */ 319 sti(); 320 SMT_PAUSE(); 321 cli(); 322 } 323 324 /* 325 * enter deep c-state handler 326 */ 327 static void 328 acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) 329 { 330 volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait; 331 cpu_t *cpup = CPU; 332 processorid_t cpu_sid = cpup->cpu_seqid; 333 cpupart_t *cp = cpup->cpu_part; 334 hrtime_t lapic_expire; 335 uint8_t type = cstate->cs_addrspace_id; 336 uint32_t cs_type = cstate->cs_type; 337 int hset_update = 1; 338 boolean_t using_timer; 339 cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup; 340 341 /* 342 * Set our mcpu_mwait here, so we can tell if anyone tries to 343 * wake us between now and when we call mwait. No other cpu will 344 * attempt to set our mcpu_mwait until we add ourself to the haltset. 345 */ 346 if (mcpu_mwait) { 347 if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 348 *mcpu_mwait = MWAIT_WAKEUP_IPI; 349 check_func = &acpi_cpu_mwait_ipi_check_wakeup; 350 } else { 351 *mcpu_mwait = MWAIT_HALTED; 352 check_func = &acpi_cpu_mwait_check_wakeup; 353 } 354 } 355 356 /* 357 * If this CPU is online, and there are multiple CPUs 358 * in the system, then we should note our halting 359 * by adding ourselves to the partition's halted CPU 360 * bitmap. This allows other CPUs to find/awaken us when 361 * work becomes available. 362 */ 363 if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1) 364 hset_update = 0; 365 366 /* 367 * Add ourselves to the partition's halted CPUs bitmask 368 * and set our HALTED flag, if necessary. 369 * 370 * When a thread becomes runnable, it is placed on the queue 371 * and then the halted cpuset is checked to determine who 372 * (if anyone) should be awakened. We therefore need to first 373 * add ourselves to the halted cpuset, and and then check if there 374 * is any work available. 375 * 376 * Note that memory barriers after updating the HALTED flag 377 * are not necessary since an atomic operation (updating the bitmap) 378 * immediately follows. On x86 the atomic operation acts as a 379 * memory barrier for the update of cpu_disp_flags. 380 */ 381 if (hset_update) { 382 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 383 bitset_atomic_add(&cp->cp_haltset, cpu_sid); 384 } 385 386 /* 387 * Check to make sure there's really nothing to do. 388 * Work destined for this CPU may become available after 389 * this check. We'll be notified through the clearing of our 390 * bit in the halted CPU bitmask, and a write to our mcpu_mwait. 391 * 392 * disp_anywork() checks disp_nrunnable, so we do not have to later. 393 */ 394 if (disp_anywork()) { 395 if (hset_update) { 396 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 397 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 398 } 399 return; 400 } 401 402 /* 403 * We're on our way to being halted. 404 * 405 * The local APIC timer can stop in ACPI C2 and deeper c-states. 406 * Try to program the HPET hardware to substitute for this CPU's 407 * LAPIC timer. 408 * cstate_use_timer() could disable the LAPIC Timer. Make sure 409 * to start the LAPIC Timer again before leaving this function. 410 * 411 * Disable interrupts here so we will awaken immediately after halting 412 * if someone tries to poke us between now and the time we actually 413 * halt. 414 */ 415 cli(); 416 using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET); 417 418 /* 419 * We check for the presence of our bit after disabling interrupts. 420 * If it's cleared, we'll return. If the bit is cleared after 421 * we check then the cstate_wakeup() will pop us out of the halted 422 * state. 423 * 424 * This means that the ordering of the cstate_wakeup() and the clearing 425 * of the bit by cpu_wakeup is important. 426 * cpu_wakeup() must clear our mc_haltset bit, and then call 427 * cstate_wakeup(). 428 * acpi_cpu_cstate() must disable interrupts, then check for the bit. 429 */ 430 if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) { 431 (void) cstate_use_timer(&lapic_expire, 432 CSTATE_USING_LAT); 433 sti(); 434 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 435 return; 436 } 437 438 /* 439 * The check for anything locally runnable is here for performance 440 * and isn't needed for correctness. disp_nrunnable ought to be 441 * in our cache still, so it's inexpensive to check, and if there 442 * is anything runnable we won't have to wait for the poke. 443 */ 444 if (cpup->cpu_disp->disp_nrunnable != 0) { 445 (void) cstate_use_timer(&lapic_expire, 446 CSTATE_USING_LAT); 447 sti(); 448 if (hset_update) { 449 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 450 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 451 } 452 return; 453 } 454 455 if (using_timer == B_FALSE) { 456 457 (void) cstate_use_timer(&lapic_expire, 458 CSTATE_USING_LAT); 459 sti(); 460 461 /* 462 * We are currently unable to program the HPET to act as this 463 * CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper 464 * because no timer is set to wake it up while its LAPIC timer 465 * stalls in deep C-States. 466 * Enter C1 instead. 467 * 468 * cstate_wake_cpu() will wake this CPU with an IPI which 469 * works with MWAIT. 470 */ 471 i86_monitor(mcpu_mwait, 0, 0); 472 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) { 473 if (cpu_idle_enter(IDLE_STATE_C1, 0, 474 check_func, (void *)mcpu_mwait) == 0) { 475 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == 476 MWAIT_HALTED) { 477 i86_mwait(0, 0); 478 } 479 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 480 } 481 } 482 483 /* 484 * We're no longer halted 485 */ 486 if (hset_update) { 487 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 488 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 489 } 490 return; 491 } 492 493 if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) { 494 /* 495 * We're on our way to being halted. 496 * To avoid a lost wakeup, arm the monitor before checking 497 * if another cpu wrote to mcpu_mwait to wake us up. 498 */ 499 i86_monitor(mcpu_mwait, 0, 0); 500 if (*mcpu_mwait == MWAIT_HALTED) { 501 if (cpu_idle_enter((uint_t)cs_type, 0, 502 check_func, (void *)mcpu_mwait) == 0) { 503 if (*mcpu_mwait == MWAIT_HALTED) { 504 i86_mwait(cstate->cs_address, 1); 505 } 506 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 507 } 508 } 509 } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 510 uint32_t value; 511 ACPI_TABLE_FADT *gbl_FADT; 512 513 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 514 if (cpu_idle_enter((uint_t)cs_type, 0, 515 check_func, (void *)mcpu_mwait) == 0) { 516 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 517 (void) cpu_acpi_read_port( 518 cstate->cs_address, &value, 8); 519 acpica_get_global_FADT(&gbl_FADT); 520 (void) cpu_acpi_read_port( 521 gbl_FADT->XPmTimerBlock.Address, 522 &value, 32); 523 } 524 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 525 } 526 } 527 } 528 529 /* 530 * The LAPIC timer may have stopped in deep c-state. 531 * Reprogram this CPU's LAPIC here before enabling interrupts. 532 */ 533 (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT); 534 sti(); 535 536 /* 537 * We're no longer halted 538 */ 539 if (hset_update) { 540 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 541 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 542 } 543 } 544 545 /* 546 * indicate when bus masters are active 547 */ 548 static uint32_t 549 cpu_acpi_bm_sts(void) 550 { 551 uint32_t bm_sts = 0; 552 553 cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_sts); 554 555 if (bm_sts) 556 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 557 558 return (bm_sts); 559 } 560 561 /* 562 * Idle the present CPU, deep c-state is supported 563 */ 564 void 565 cpu_acpi_idle(void) 566 { 567 cpu_t *cp = CPU; 568 cpu_acpi_handle_t handle; 569 cma_c_state_t *cs_data; 570 cpu_acpi_cstate_t *cstates; 571 hrtime_t start, end; 572 int cpu_max_cstates; 573 uint32_t cs_indx; 574 uint16_t cs_type; 575 576 cpupm_mach_state_t *mach_state = 577 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 578 handle = mach_state->ms_acpi_handle; 579 ASSERT(CPU_ACPI_CSTATES(handle) != NULL); 580 581 cs_data = mach_state->ms_cstate.cma_state.cstate; 582 cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 583 ASSERT(cstates != NULL); 584 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 585 if (cpu_max_cstates > CPU_MAX_CSTATES) 586 cpu_max_cstates = CPU_MAX_CSTATES; 587 if (cpu_max_cstates == 1) { /* no ACPI c-state data */ 588 (*non_deep_idle_cpu)(); 589 return; 590 } 591 592 start = gethrtime_unscaled(); 593 594 cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start); 595 596 /* 597 * OSPM uses the BM_STS bit to determine the power state to enter 598 * when considering a transition to or from the C2/C3 power state. 599 * if C3 is determined, bus master activity demotes the power state 600 * to C2. 601 */ 602 if ((cstates[cs_indx].cs_type >= CPU_ACPI_C3) && cpu_acpi_bm_sts()) 603 --cs_indx; 604 cs_type = cstates[cs_indx].cs_type; 605 606 /* 607 * BM_RLD determines if the Cx power state was exited as a result of 608 * bus master requests. Set this bit when using a C3 power state, and 609 * clear it when using a C1 or C2 power state. 610 */ 611 if ((CPU_ACPI_BM_INFO(handle) & BM_RLD) && (cs_type < CPU_ACPI_C3)) { 612 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 613 CPU_ACPI_BM_INFO(handle) &= ~BM_RLD; 614 } 615 616 if ((!(CPU_ACPI_BM_INFO(handle) & BM_RLD)) && 617 (cs_type >= CPU_ACPI_C3)) { 618 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 619 CPU_ACPI_BM_INFO(handle) |= BM_RLD; 620 } 621 622 switch (cs_type) { 623 default: 624 /* FALLTHROUGH */ 625 case CPU_ACPI_C1: 626 (*non_deep_idle_cpu)(); 627 break; 628 629 case CPU_ACPI_C2: 630 acpi_cpu_cstate(&cstates[cs_indx]); 631 break; 632 633 case CPU_ACPI_C3: 634 /* 635 * recommended in ACPI spec, providing hardware mechanisms 636 * to prevent master from writing to memory (UP-only) 637 */ 638 if ((ncpus_online == 1) && 639 (CPU_ACPI_BM_INFO(handle) & BM_CTL)) { 640 cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 641 CPU_ACPI_BM_INFO(handle) |= BM_ARB_DIS; 642 /* 643 * Today all Intel's processor support C3 share cache. 644 */ 645 } else if (x86_vendor != X86_VENDOR_Intel) { 646 __acpi_wbinvd(); 647 } 648 acpi_cpu_cstate(&cstates[cs_indx]); 649 if (CPU_ACPI_BM_INFO(handle) & BM_ARB_DIS) { 650 cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 651 CPU_ACPI_BM_INFO(handle) &= ~BM_ARB_DIS; 652 } 653 break; 654 } 655 656 end = gethrtime_unscaled(); 657 658 /* 659 * Update statistics 660 */ 661 cpupm_wakeup_cstate_data(cs_data, end); 662 } 663 664 boolean_t 665 cpu_deep_cstates_supported(void) 666 { 667 extern int idle_cpu_no_deep_c; 668 669 if (idle_cpu_no_deep_c) 670 return (B_FALSE); 671 672 if (!cpuid_deep_cstates_supported()) 673 return (B_FALSE); 674 675 if (cpuid_arat_supported()) { 676 cpu_cstate_arat = B_TRUE; 677 return (B_TRUE); 678 } 679 680 if ((hpet.supported == HPET_FULL_SUPPORT) && 681 hpet.install_proxy()) { 682 cpu_cstate_hpet = B_TRUE; 683 return (B_TRUE); 684 } 685 686 return (B_FALSE); 687 } 688 689 /* 690 * Validate that this processor supports deep cstate and if so, 691 * get the c-state data from ACPI and cache it. 692 */ 693 static int 694 cpu_idle_init(cpu_t *cp) 695 { 696 cpupm_mach_state_t *mach_state = 697 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 698 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 699 cpu_acpi_cstate_t *cstate; 700 char name[KSTAT_STRLEN]; 701 int cpu_max_cstates, i; 702 ACPI_TABLE_FADT *gbl_FADT; 703 704 /* 705 * Cache the C-state specific ACPI data. 706 */ 707 if (cpu_acpi_cache_cstate_data(handle) != 0) { 708 cmn_err(CE_NOTE, 709 "!cpu_idle_init: Failed to cache ACPI C-state data\n"); 710 cpu_idle_fini(cp); 711 return (-1); 712 } 713 714 /* 715 * Check the bus master arbitration control ability. 716 */ 717 acpica_get_global_FADT(&gbl_FADT); 718 if (gbl_FADT->Pm2ControlBlock && gbl_FADT->Pm2ControlLength) 719 CPU_ACPI_BM_INFO(handle) |= BM_CTL; 720 721 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 722 723 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 724 725 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 726 (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type); 727 /* 728 * Allocate, initialize and install cstate kstat 729 */ 730 cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id, 731 name, "misc", 732 KSTAT_TYPE_NAMED, 733 sizeof (cpu_idle_kstat) / sizeof (kstat_named_t), 734 KSTAT_FLAG_VIRTUAL); 735 736 if (cstate->cs_ksp == NULL) { 737 cmn_err(CE_NOTE, "kstat_create(c_state) fail"); 738 } else { 739 cstate->cs_ksp->ks_data = &cpu_idle_kstat; 740 cstate->cs_ksp->ks_lock = &cpu_idle_mutex; 741 cstate->cs_ksp->ks_update = cpu_idle_kstat_update; 742 cstate->cs_ksp->ks_data_size += MAXNAMELEN; 743 cstate->cs_ksp->ks_private = cstate; 744 kstat_install(cstate->cs_ksp); 745 cstate++; 746 } 747 } 748 749 cpupm_alloc_domains(cp, CPUPM_C_STATES); 750 cpupm_alloc_ms_cstate(cp); 751 752 if (cpu_deep_cstates_supported()) { 753 mutex_enter(&cpu_idle_callb_mutex); 754 if (cpu_deep_idle_callb_id == (callb_id_t)0) 755 cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb, 756 (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle"); 757 if (cpu_idle_cpr_callb_id == (callb_id_t)0) 758 cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb, 759 (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr"); 760 mutex_exit(&cpu_idle_callb_mutex); 761 } 762 763 return (0); 764 } 765 766 /* 767 * Free resources allocated by cpu_idle_init(). 768 */ 769 static void 770 cpu_idle_fini(cpu_t *cp) 771 { 772 cpupm_mach_state_t *mach_state = 773 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 774 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 775 cpu_acpi_cstate_t *cstate; 776 uint_t cpu_max_cstates, i; 777 778 /* 779 * idle cpu points back to the generic one 780 */ 781 idle_cpu = CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 782 disp_enq_thread = non_deep_idle_disp_enq_thread; 783 784 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 785 if (cstate) { 786 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 787 788 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 789 if (cstate->cs_ksp != NULL) 790 kstat_delete(cstate->cs_ksp); 791 cstate++; 792 } 793 } 794 795 cpupm_free_ms_cstate(cp); 796 cpupm_free_domains(&cpupm_cstate_domains); 797 cpu_acpi_free_cstate_data(handle); 798 799 mutex_enter(&cpu_idle_callb_mutex); 800 if (cpu_deep_idle_callb_id != (callb_id_t)0) { 801 (void) callb_delete(cpu_deep_idle_callb_id); 802 cpu_deep_idle_callb_id = (callb_id_t)0; 803 } 804 if (cpu_idle_cpr_callb_id != (callb_id_t)0) { 805 (void) callb_delete(cpu_idle_cpr_callb_id); 806 cpu_idle_cpr_callb_id = (callb_id_t)0; 807 } 808 mutex_exit(&cpu_idle_callb_mutex); 809 } 810 811 /*ARGSUSED*/ 812 static boolean_t 813 cpu_deep_idle_callb(void *arg, int code) 814 { 815 boolean_t rslt = B_TRUE; 816 817 mutex_enter(&cpu_idle_callb_mutex); 818 switch (code) { 819 case PM_DEFAULT_CPU_DEEP_IDLE: 820 /* 821 * Default policy is same as enable 822 */ 823 /*FALLTHROUGH*/ 824 case PM_ENABLE_CPU_DEEP_IDLE: 825 if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0) 826 break; 827 828 if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) { 829 disp_enq_thread = cstate_wakeup; 830 idle_cpu = cpu_idle_adaptive; 831 cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG; 832 } else { 833 rslt = B_FALSE; 834 } 835 break; 836 837 case PM_DISABLE_CPU_DEEP_IDLE: 838 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 839 break; 840 841 idle_cpu = non_deep_idle_cpu; 842 if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) { 843 disp_enq_thread = non_deep_idle_disp_enq_thread; 844 cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG; 845 } 846 break; 847 848 default: 849 cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n", 850 code); 851 break; 852 } 853 mutex_exit(&cpu_idle_callb_mutex); 854 return (rslt); 855 } 856 857 /*ARGSUSED*/ 858 static boolean_t 859 cpu_idle_cpr_callb(void *arg, int code) 860 { 861 boolean_t rslt = B_TRUE; 862 863 mutex_enter(&cpu_idle_callb_mutex); 864 switch (code) { 865 case CB_CODE_CPR_RESUME: 866 if (cstate_timer_callback(CB_CODE_CPR_RESUME)) { 867 /* 868 * Do not enable dispatcher hooks if disabled by user. 869 */ 870 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 871 break; 872 873 disp_enq_thread = cstate_wakeup; 874 idle_cpu = cpu_idle_adaptive; 875 } else { 876 rslt = B_FALSE; 877 } 878 break; 879 880 case CB_CODE_CPR_CHKPT: 881 idle_cpu = non_deep_idle_cpu; 882 disp_enq_thread = non_deep_idle_disp_enq_thread; 883 (void) cstate_timer_callback(CB_CODE_CPR_CHKPT); 884 break; 885 886 default: 887 cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code); 888 break; 889 } 890 mutex_exit(&cpu_idle_callb_mutex); 891 return (rslt); 892 } 893 894 /* 895 * handle _CST notification 896 */ 897 void 898 cpuidle_cstate_instance(cpu_t *cp) 899 { 900 #ifndef __xpv 901 cpupm_mach_state_t *mach_state = 902 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 903 cpu_acpi_handle_t handle; 904 struct machcpu *mcpu; 905 cpuset_t dom_cpu_set; 906 kmutex_t *pm_lock; 907 int result = 0; 908 processorid_t cpu_id; 909 910 if (mach_state == NULL) { 911 return; 912 } 913 914 ASSERT(mach_state->ms_cstate.cma_domain != NULL); 915 dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus; 916 pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock; 917 918 /* 919 * Do for all the CPU's in the domain 920 */ 921 mutex_enter(pm_lock); 922 do { 923 CPUSET_FIND(dom_cpu_set, cpu_id); 924 if (cpu_id == CPUSET_NOTINSET) 925 break; 926 927 ASSERT(cpu_id >= 0 && cpu_id < NCPU); 928 cp = cpu[cpu_id]; 929 mach_state = (cpupm_mach_state_t *) 930 cp->cpu_m.mcpu_pm_mach_state; 931 if (!(mach_state->ms_caps & CPUPM_C_STATES)) { 932 mutex_exit(pm_lock); 933 return; 934 } 935 handle = mach_state->ms_acpi_handle; 936 ASSERT(handle != NULL); 937 938 /* 939 * re-evaluate cstate object 940 */ 941 if (cpu_acpi_cache_cstate_data(handle) != 0) { 942 cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state" 943 " object Instance: %d", cpu_id); 944 } 945 mutex_enter(&cpu_lock); 946 mcpu = &(cp->cpu_m); 947 mcpu->max_cstates = cpu_acpi_get_max_cstates(handle); 948 if (mcpu->max_cstates > CPU_ACPI_C1) { 949 (void) cstate_timer_callback( 950 CST_EVENT_MULTIPLE_CSTATES); 951 disp_enq_thread = cstate_wakeup; 952 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 953 } else if (mcpu->max_cstates == CPU_ACPI_C1) { 954 disp_enq_thread = non_deep_idle_disp_enq_thread; 955 cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 956 (void) cstate_timer_callback(CST_EVENT_ONE_CSTATE); 957 } 958 mutex_exit(&cpu_lock); 959 960 CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result); 961 mutex_exit(pm_lock); 962 } while (result < 0); 963 #endif 964 } 965 966 /* 967 * handle the number or the type of available processor power states change 968 */ 969 void 970 cpuidle_manage_cstates(void *ctx) 971 { 972 cpu_t *cp = ctx; 973 cpupm_mach_state_t *mach_state = 974 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 975 boolean_t is_ready; 976 977 if (mach_state == NULL) { 978 return; 979 } 980 981 /* 982 * We currently refuse to power manage if the CPU is not ready to 983 * take cross calls (cross calls fail silently if CPU is not ready 984 * for it). 985 * 986 * Additionally, for x86 platforms we cannot power manage 987 * any one instance, until all instances have been initialized. 988 * That's because we don't know what the CPU domains look like 989 * until all instances have been initialized. 990 */ 991 is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(); 992 if (!is_ready) 993 return; 994 995 cpuidle_cstate_instance(cp); 996 } 997