1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/x86_archext.h> 31 #include <sys/machsystm.h> 32 #include <sys/x_call.h> 33 #include <sys/stat.h> 34 #include <sys/acpi/acpi.h> 35 #include <sys/acpica.h> 36 #include <sys/cpu_acpi.h> 37 #include <sys/cpu_idle.h> 38 #include <sys/cpupm.h> 39 #include <sys/cpu_event.h> 40 #include <sys/hpet.h> 41 #include <sys/archsystm.h> 42 #include <vm/hat_i86.h> 43 #include <sys/dtrace.h> 44 #include <sys/sdt.h> 45 #include <sys/callb.h> 46 47 #define CSTATE_USING_HPET 1 48 #define CSTATE_USING_LAT 2 49 50 extern void cpu_idle_adaptive(void); 51 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data, 52 cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start); 53 54 static int cpu_idle_init(cpu_t *); 55 static void cpu_idle_fini(cpu_t *); 56 static void cpu_idle_stop(cpu_t *); 57 static boolean_t cpu_deep_idle_callb(void *arg, int code); 58 static boolean_t cpu_idle_cpr_callb(void *arg, int code); 59 static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate); 60 61 static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer); 62 63 /* 64 * the flag of always-running local APIC timer. 65 * the flag of HPET Timer use in deep cstate. 66 */ 67 static boolean_t cpu_cstate_arat = B_FALSE; 68 static boolean_t cpu_cstate_hpet = B_FALSE; 69 70 /* 71 * Interfaces for modules implementing Intel's deep c-state. 72 */ 73 cpupm_state_ops_t cpu_idle_ops = { 74 "Generic ACPI C-state Support", 75 cpu_idle_init, 76 cpu_idle_fini, 77 NULL, 78 cpu_idle_stop 79 }; 80 81 static kmutex_t cpu_idle_callb_mutex; 82 static callb_id_t cpu_deep_idle_callb_id; 83 static callb_id_t cpu_idle_cpr_callb_id; 84 static uint_t cpu_idle_cfg_state; 85 86 static kmutex_t cpu_idle_mutex; 87 88 cpu_idle_kstat_t cpu_idle_kstat = { 89 { "address_space_id", KSTAT_DATA_STRING }, 90 { "latency", KSTAT_DATA_UINT32 }, 91 { "power", KSTAT_DATA_UINT32 }, 92 }; 93 94 /* 95 * kstat update function of the c-state info 96 */ 97 static int 98 cpu_idle_kstat_update(kstat_t *ksp, int flag) 99 { 100 cpu_acpi_cstate_t *cstate = ksp->ks_private; 101 102 if (flag == KSTAT_WRITE) { 103 return (EACCES); 104 } 105 106 if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 107 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 108 "FFixedHW"); 109 } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) { 110 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 111 "SystemIO"); 112 } else { 113 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 114 "Unsupported"); 115 } 116 117 cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency; 118 cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power; 119 120 return (0); 121 } 122 123 /* 124 * Used during configuration callbacks to manage implementation specific 125 * details of the hardware timer used during Deep C-state. 126 */ 127 boolean_t 128 cstate_timer_callback(int code) 129 { 130 if (cpu_cstate_arat) { 131 return (B_TRUE); 132 } else if (cpu_cstate_hpet) { 133 return (hpet.callback(code)); 134 } 135 return (B_FALSE); 136 } 137 138 /* 139 * Some Local APIC Timers do not work during Deep C-states. 140 * The Deep C-state idle function uses this function to ensure it is using a 141 * hardware timer that works during Deep C-states. This function also 142 * switches the timer back to the LACPI Timer after Deep C-state. 143 */ 144 static boolean_t 145 cstate_use_timer(hrtime_t *lapic_expire, int timer) 146 { 147 if (cpu_cstate_arat) 148 return (B_TRUE); 149 150 /* 151 * We have to return B_FALSE if no arat or hpet support 152 */ 153 if (!cpu_cstate_hpet) 154 return (B_FALSE); 155 156 switch (timer) { 157 case CSTATE_USING_HPET: 158 return (hpet.use_hpet_timer(lapic_expire)); 159 case CSTATE_USING_LAT: 160 hpet.use_lapic_timer(*lapic_expire); 161 return (B_TRUE); 162 default: 163 return (B_FALSE); 164 } 165 } 166 167 /* 168 * c-state wakeup function. 169 * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals 170 * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State. 171 */ 172 void 173 cstate_wakeup(cpu_t *cp, int bound) 174 { 175 struct machcpu *mcpu = &(cp->cpu_m); 176 volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait; 177 cpupart_t *cpu_part; 178 uint_t cpu_found; 179 processorid_t cpu_sid; 180 181 cpu_part = cp->cpu_part; 182 cpu_sid = cp->cpu_seqid; 183 /* 184 * Clear the halted bit for that CPU since it will be woken up 185 * in a moment. 186 */ 187 if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) { 188 /* 189 * Clear the halted bit for that CPU since it will be 190 * poked in a moment. 191 */ 192 bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid); 193 194 /* 195 * We may find the current CPU present in the halted cpuset 196 * if we're in the context of an interrupt that occurred 197 * before we had a chance to clear our bit in cpu_idle(). 198 * Waking ourself is obviously unnecessary, since if 199 * we're here, we're not halted. 200 */ 201 if (cp != CPU) { 202 /* 203 * Use correct wakeup mechanism 204 */ 205 if ((mcpu_mwait != NULL) && 206 (*mcpu_mwait == MWAIT_HALTED)) 207 MWAIT_WAKEUP(cp); 208 else 209 poke_cpu(cp->cpu_id); 210 } 211 return; 212 } else { 213 /* 214 * This cpu isn't halted, but it's idle or undergoing a 215 * context switch. No need to awaken anyone else. 216 */ 217 if (cp->cpu_thread == cp->cpu_idle_thread || 218 cp->cpu_disp_flags & CPU_DISP_DONTSTEAL) 219 return; 220 } 221 222 /* 223 * No need to wake up other CPUs if the thread we just enqueued 224 * is bound. 225 */ 226 if (bound) 227 return; 228 229 230 /* 231 * See if there's any other halted CPUs. If there are, then 232 * select one, and awaken it. 233 * It's possible that after we find a CPU, somebody else 234 * will awaken it before we get the chance. 235 * In that case, look again. 236 */ 237 do { 238 cpu_found = bitset_find(&cpu_part->cp_haltset); 239 if (cpu_found == (uint_t)-1) 240 return; 241 242 } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset, 243 cpu_found) < 0); 244 245 /* 246 * Must use correct wakeup mechanism to avoid lost wakeup of 247 * alternate cpu. 248 */ 249 if (cpu_found != CPU->cpu_seqid) { 250 mcpu_mwait = cpu[cpu_found]->cpu_m.mcpu_mwait; 251 if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED)) 252 MWAIT_WAKEUP(cpu_seq[cpu_found]); 253 else 254 poke_cpu(cpu_seq[cpu_found]->cpu_id); 255 } 256 } 257 258 /* 259 * Function called by CPU idle notification framework to check whether CPU 260 * has been awakened. It will be called with interrupt disabled. 261 * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle 262 * notification framework. 263 */ 264 static void 265 acpi_cpu_mwait_check_wakeup(void *arg) 266 { 267 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 268 269 ASSERT(arg != NULL); 270 if (*mcpu_mwait != MWAIT_HALTED) { 271 /* 272 * CPU has been awakened, notify CPU idle notification system. 273 */ 274 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 275 } else { 276 /* 277 * Toggle interrupt flag to detect pending interrupts. 278 * If interrupt happened, do_interrupt() will notify CPU idle 279 * notification framework so no need to call cpu_idle_exit() 280 * here. 281 */ 282 sti(); 283 SMT_PAUSE(); 284 cli(); 285 } 286 } 287 288 static void 289 acpi_cpu_mwait_ipi_check_wakeup(void *arg) 290 { 291 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 292 293 ASSERT(arg != NULL); 294 if (*mcpu_mwait != MWAIT_WAKEUP_IPI) { 295 /* 296 * CPU has been awakened, notify CPU idle notification system. 297 */ 298 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 299 } else { 300 /* 301 * Toggle interrupt flag to detect pending interrupts. 302 * If interrupt happened, do_interrupt() will notify CPU idle 303 * notification framework so no need to call cpu_idle_exit() 304 * here. 305 */ 306 sti(); 307 SMT_PAUSE(); 308 cli(); 309 } 310 } 311 312 /*ARGSUSED*/ 313 static void 314 acpi_cpu_check_wakeup(void *arg) 315 { 316 /* 317 * Toggle interrupt flag to detect pending interrupts. 318 * If interrupt happened, do_interrupt() will notify CPU idle 319 * notification framework so no need to call cpu_idle_exit() here. 320 */ 321 sti(); 322 SMT_PAUSE(); 323 cli(); 324 } 325 326 /* 327 * enter deep c-state handler 328 */ 329 static void 330 acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) 331 { 332 volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait; 333 cpu_t *cpup = CPU; 334 processorid_t cpu_sid = cpup->cpu_seqid; 335 cpupart_t *cp = cpup->cpu_part; 336 hrtime_t lapic_expire; 337 uint8_t type = cstate->cs_addrspace_id; 338 uint32_t cs_type = cstate->cs_type; 339 int hset_update = 1; 340 boolean_t using_timer; 341 cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup; 342 343 /* 344 * Set our mcpu_mwait here, so we can tell if anyone tries to 345 * wake us between now and when we call mwait. No other cpu will 346 * attempt to set our mcpu_mwait until we add ourself to the haltset. 347 */ 348 if (mcpu_mwait) { 349 if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 350 *mcpu_mwait = MWAIT_WAKEUP_IPI; 351 check_func = &acpi_cpu_mwait_ipi_check_wakeup; 352 } else { 353 *mcpu_mwait = MWAIT_HALTED; 354 check_func = &acpi_cpu_mwait_check_wakeup; 355 } 356 } 357 358 /* 359 * If this CPU is online, and there are multiple CPUs 360 * in the system, then we should note our halting 361 * by adding ourselves to the partition's halted CPU 362 * bitmap. This allows other CPUs to find/awaken us when 363 * work becomes available. 364 */ 365 if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1) 366 hset_update = 0; 367 368 /* 369 * Add ourselves to the partition's halted CPUs bitmask 370 * and set our HALTED flag, if necessary. 371 * 372 * When a thread becomes runnable, it is placed on the queue 373 * and then the halted cpuset is checked to determine who 374 * (if anyone) should be awakened. We therefore need to first 375 * add ourselves to the halted cpuset, and and then check if there 376 * is any work available. 377 * 378 * Note that memory barriers after updating the HALTED flag 379 * are not necessary since an atomic operation (updating the bitmap) 380 * immediately follows. On x86 the atomic operation acts as a 381 * memory barrier for the update of cpu_disp_flags. 382 */ 383 if (hset_update) { 384 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 385 bitset_atomic_add(&cp->cp_haltset, cpu_sid); 386 } 387 388 /* 389 * Check to make sure there's really nothing to do. 390 * Work destined for this CPU may become available after 391 * this check. We'll be notified through the clearing of our 392 * bit in the halted CPU bitmask, and a write to our mcpu_mwait. 393 * 394 * disp_anywork() checks disp_nrunnable, so we do not have to later. 395 */ 396 if (disp_anywork()) { 397 if (hset_update) { 398 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 399 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 400 } 401 return; 402 } 403 404 /* 405 * We're on our way to being halted. 406 * 407 * The local APIC timer can stop in ACPI C2 and deeper c-states. 408 * Try to program the HPET hardware to substitute for this CPU's 409 * LAPIC timer. 410 * cstate_use_timer() could disable the LAPIC Timer. Make sure 411 * to start the LAPIC Timer again before leaving this function. 412 * 413 * Disable interrupts here so we will awaken immediately after halting 414 * if someone tries to poke us between now and the time we actually 415 * halt. 416 */ 417 cli(); 418 using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET); 419 420 /* 421 * We check for the presence of our bit after disabling interrupts. 422 * If it's cleared, we'll return. If the bit is cleared after 423 * we check then the cstate_wakeup() will pop us out of the halted 424 * state. 425 * 426 * This means that the ordering of the cstate_wakeup() and the clearing 427 * of the bit by cpu_wakeup is important. 428 * cpu_wakeup() must clear our mc_haltset bit, and then call 429 * cstate_wakeup(). 430 * acpi_cpu_cstate() must disable interrupts, then check for the bit. 431 */ 432 if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) { 433 (void) cstate_use_timer(&lapic_expire, 434 CSTATE_USING_LAT); 435 sti(); 436 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 437 return; 438 } 439 440 /* 441 * The check for anything locally runnable is here for performance 442 * and isn't needed for correctness. disp_nrunnable ought to be 443 * in our cache still, so it's inexpensive to check, and if there 444 * is anything runnable we won't have to wait for the poke. 445 */ 446 if (cpup->cpu_disp->disp_nrunnable != 0) { 447 (void) cstate_use_timer(&lapic_expire, 448 CSTATE_USING_LAT); 449 sti(); 450 if (hset_update) { 451 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 452 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 453 } 454 return; 455 } 456 457 if (using_timer == B_FALSE) { 458 459 (void) cstate_use_timer(&lapic_expire, 460 CSTATE_USING_LAT); 461 sti(); 462 463 /* 464 * We are currently unable to program the HPET to act as this 465 * CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper 466 * because no timer is set to wake it up while its LAPIC timer 467 * stalls in deep C-States. 468 * Enter C1 instead. 469 * 470 * cstate_wake_cpu() will wake this CPU with an IPI which 471 * works with MWAIT. 472 */ 473 i86_monitor(mcpu_mwait, 0, 0); 474 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) { 475 if (cpu_idle_enter(IDLE_STATE_C1, 0, 476 check_func, (void *)mcpu_mwait) == 0) { 477 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == 478 MWAIT_HALTED) { 479 i86_mwait(0, 0); 480 } 481 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 482 } 483 } 484 485 /* 486 * We're no longer halted 487 */ 488 if (hset_update) { 489 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 490 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 491 } 492 return; 493 } 494 495 if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) { 496 /* 497 * We're on our way to being halted. 498 * To avoid a lost wakeup, arm the monitor before checking 499 * if another cpu wrote to mcpu_mwait to wake us up. 500 */ 501 i86_monitor(mcpu_mwait, 0, 0); 502 if (*mcpu_mwait == MWAIT_HALTED) { 503 if (cpu_idle_enter((uint_t)cs_type, 0, 504 check_func, (void *)mcpu_mwait) == 0) { 505 if (*mcpu_mwait == MWAIT_HALTED) { 506 i86_mwait(cstate->cs_address, 1); 507 } 508 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 509 } 510 } 511 } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 512 uint32_t value; 513 ACPI_TABLE_FADT *gbl_FADT; 514 515 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 516 if (cpu_idle_enter((uint_t)cs_type, 0, 517 check_func, (void *)mcpu_mwait) == 0) { 518 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 519 (void) cpu_acpi_read_port( 520 cstate->cs_address, &value, 8); 521 acpica_get_global_FADT(&gbl_FADT); 522 (void) cpu_acpi_read_port( 523 gbl_FADT->XPmTimerBlock.Address, 524 &value, 32); 525 } 526 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 527 } 528 } 529 } 530 531 /* 532 * The LAPIC timer may have stopped in deep c-state. 533 * Reprogram this CPU's LAPIC here before enabling interrupts. 534 */ 535 (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT); 536 sti(); 537 538 /* 539 * We're no longer halted 540 */ 541 if (hset_update) { 542 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 543 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 544 } 545 } 546 547 /* 548 * Idle the present CPU, deep c-state is supported 549 */ 550 void 551 cpu_acpi_idle(void) 552 { 553 cpu_t *cp = CPU; 554 cpu_acpi_handle_t handle; 555 cma_c_state_t *cs_data; 556 cpu_acpi_cstate_t *cstates; 557 hrtime_t start, end; 558 int cpu_max_cstates; 559 uint32_t cs_indx; 560 uint16_t cs_type; 561 562 cpupm_mach_state_t *mach_state = 563 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 564 handle = mach_state->ms_acpi_handle; 565 ASSERT(CPU_ACPI_CSTATES(handle) != NULL); 566 567 cs_data = mach_state->ms_cstate.cma_state.cstate; 568 cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 569 ASSERT(cstates != NULL); 570 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 571 if (cpu_max_cstates > CPU_MAX_CSTATES) 572 cpu_max_cstates = CPU_MAX_CSTATES; 573 if (cpu_max_cstates == 1) { /* no ACPI c-state data */ 574 (*non_deep_idle_cpu)(); 575 return; 576 } 577 578 start = gethrtime_unscaled(); 579 580 cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start); 581 582 cs_type = cstates[cs_indx].cs_type; 583 584 switch (cs_type) { 585 default: 586 /* FALLTHROUGH */ 587 case CPU_ACPI_C1: 588 (*non_deep_idle_cpu)(); 589 break; 590 591 case CPU_ACPI_C2: 592 acpi_cpu_cstate(&cstates[cs_indx]); 593 break; 594 595 case CPU_ACPI_C3: 596 /* 597 * All supported Intel processors maintain cache coherency 598 * during C3. Currently when entering C3 processors flush 599 * core caches to higher level shared cache. The shared cache 600 * maintains state and supports probes during C3. 601 * Consequently there is no need to handle cache coherency 602 * and Bus Master activity here with the cache flush, BM_RLD 603 * bit, BM_STS bit, nor PM2_CNT.ARB_DIS mechanisms described 604 * in section 8.1.4 of the ACPI Specification 4.0. 605 */ 606 acpi_cpu_cstate(&cstates[cs_indx]); 607 break; 608 } 609 610 end = gethrtime_unscaled(); 611 612 /* 613 * Update statistics 614 */ 615 cpupm_wakeup_cstate_data(cs_data, end); 616 } 617 618 boolean_t 619 cpu_deep_cstates_supported(void) 620 { 621 extern int idle_cpu_no_deep_c; 622 623 if (idle_cpu_no_deep_c) 624 return (B_FALSE); 625 626 if (!cpuid_deep_cstates_supported()) 627 return (B_FALSE); 628 629 if (cpuid_arat_supported()) { 630 cpu_cstate_arat = B_TRUE; 631 return (B_TRUE); 632 } 633 634 if ((hpet.supported == HPET_FULL_SUPPORT) && 635 hpet.install_proxy()) { 636 cpu_cstate_hpet = B_TRUE; 637 return (B_TRUE); 638 } 639 640 return (B_FALSE); 641 } 642 643 /* 644 * Validate that this processor supports deep cstate and if so, 645 * get the c-state data from ACPI and cache it. 646 */ 647 static int 648 cpu_idle_init(cpu_t *cp) 649 { 650 cpupm_mach_state_t *mach_state = 651 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 652 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 653 cpu_acpi_cstate_t *cstate; 654 char name[KSTAT_STRLEN]; 655 int cpu_max_cstates, i; 656 int ret; 657 658 /* 659 * Cache the C-state specific ACPI data. 660 */ 661 if ((ret = cpu_acpi_cache_cstate_data(handle)) != 0) { 662 if (ret < 0) 663 cmn_err(CE_NOTE, 664 "!Support for CPU deep idle states is being " 665 "disabled due to errors parsing ACPI C-state " 666 "objects exported by BIOS."); 667 cpu_idle_fini(cp); 668 return (-1); 669 } 670 671 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 672 673 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 674 675 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 676 (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type); 677 /* 678 * Allocate, initialize and install cstate kstat 679 */ 680 cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id, 681 name, "misc", 682 KSTAT_TYPE_NAMED, 683 sizeof (cpu_idle_kstat) / sizeof (kstat_named_t), 684 KSTAT_FLAG_VIRTUAL); 685 686 if (cstate->cs_ksp == NULL) { 687 cmn_err(CE_NOTE, "kstat_create(c_state) fail"); 688 } else { 689 cstate->cs_ksp->ks_data = &cpu_idle_kstat; 690 cstate->cs_ksp->ks_lock = &cpu_idle_mutex; 691 cstate->cs_ksp->ks_update = cpu_idle_kstat_update; 692 cstate->cs_ksp->ks_data_size += MAXNAMELEN; 693 cstate->cs_ksp->ks_private = cstate; 694 kstat_install(cstate->cs_ksp); 695 cstate++; 696 } 697 } 698 699 cpupm_alloc_domains(cp, CPUPM_C_STATES); 700 cpupm_alloc_ms_cstate(cp); 701 702 if (cpu_deep_cstates_supported()) { 703 uint32_t value; 704 705 mutex_enter(&cpu_idle_callb_mutex); 706 if (cpu_deep_idle_callb_id == (callb_id_t)0) 707 cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb, 708 (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle"); 709 if (cpu_idle_cpr_callb_id == (callb_id_t)0) 710 cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb, 711 (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr"); 712 mutex_exit(&cpu_idle_callb_mutex); 713 714 715 /* 716 * All supported CPUs (Nehalem and later) will remain in C3 717 * during Bus Master activity. 718 * All CPUs set ACPI_BITREG_BUS_MASTER_RLD to 0 here if it 719 * is not already 0 before enabling Deeper C-states. 720 */ 721 cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_RLD, &value); 722 if (value & 1) 723 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 724 } 725 726 return (0); 727 } 728 729 /* 730 * Free resources allocated by cpu_idle_init(). 731 */ 732 static void 733 cpu_idle_fini(cpu_t *cp) 734 { 735 cpupm_mach_state_t *mach_state = 736 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 737 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 738 cpu_acpi_cstate_t *cstate; 739 uint_t cpu_max_cstates, i; 740 741 /* 742 * idle cpu points back to the generic one 743 */ 744 idle_cpu = cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 745 disp_enq_thread = non_deep_idle_disp_enq_thread; 746 747 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 748 if (cstate) { 749 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 750 751 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 752 if (cstate->cs_ksp != NULL) 753 kstat_delete(cstate->cs_ksp); 754 cstate++; 755 } 756 } 757 758 cpupm_free_ms_cstate(cp); 759 cpupm_free_domains(&cpupm_cstate_domains); 760 cpu_acpi_free_cstate_data(handle); 761 762 mutex_enter(&cpu_idle_callb_mutex); 763 if (cpu_deep_idle_callb_id != (callb_id_t)0) { 764 (void) callb_delete(cpu_deep_idle_callb_id); 765 cpu_deep_idle_callb_id = (callb_id_t)0; 766 } 767 if (cpu_idle_cpr_callb_id != (callb_id_t)0) { 768 (void) callb_delete(cpu_idle_cpr_callb_id); 769 cpu_idle_cpr_callb_id = (callb_id_t)0; 770 } 771 mutex_exit(&cpu_idle_callb_mutex); 772 } 773 774 static void 775 cpu_idle_stop(cpu_t *cp) 776 { 777 cpupm_mach_state_t *mach_state = 778 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 779 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 780 cpu_acpi_cstate_t *cstate; 781 uint_t cpu_max_cstates, i; 782 783 /* 784 * place the CPUs in a safe place so that we can disable 785 * deep c-state on them. 786 */ 787 pause_cpus(NULL); 788 cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 789 start_cpus(); 790 791 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 792 if (cstate) { 793 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 794 795 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 796 if (cstate->cs_ksp != NULL) 797 kstat_delete(cstate->cs_ksp); 798 cstate++; 799 } 800 } 801 cpupm_free_ms_cstate(cp); 802 cpupm_remove_domains(cp, CPUPM_C_STATES, &cpupm_cstate_domains); 803 cpu_acpi_free_cstate_data(handle); 804 } 805 806 /*ARGSUSED*/ 807 static boolean_t 808 cpu_deep_idle_callb(void *arg, int code) 809 { 810 boolean_t rslt = B_TRUE; 811 812 mutex_enter(&cpu_idle_callb_mutex); 813 switch (code) { 814 case PM_DEFAULT_CPU_DEEP_IDLE: 815 /* 816 * Default policy is same as enable 817 */ 818 /*FALLTHROUGH*/ 819 case PM_ENABLE_CPU_DEEP_IDLE: 820 if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0) 821 break; 822 823 if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) { 824 disp_enq_thread = cstate_wakeup; 825 idle_cpu = cpu_idle_adaptive; 826 cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG; 827 } else { 828 rslt = B_FALSE; 829 } 830 break; 831 832 case PM_DISABLE_CPU_DEEP_IDLE: 833 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 834 break; 835 836 idle_cpu = non_deep_idle_cpu; 837 if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) { 838 disp_enq_thread = non_deep_idle_disp_enq_thread; 839 cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG; 840 } 841 break; 842 843 default: 844 cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n", 845 code); 846 break; 847 } 848 mutex_exit(&cpu_idle_callb_mutex); 849 return (rslt); 850 } 851 852 /*ARGSUSED*/ 853 static boolean_t 854 cpu_idle_cpr_callb(void *arg, int code) 855 { 856 boolean_t rslt = B_TRUE; 857 858 mutex_enter(&cpu_idle_callb_mutex); 859 switch (code) { 860 case CB_CODE_CPR_RESUME: 861 if (cstate_timer_callback(CB_CODE_CPR_RESUME)) { 862 /* 863 * Do not enable dispatcher hooks if disabled by user. 864 */ 865 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 866 break; 867 868 disp_enq_thread = cstate_wakeup; 869 idle_cpu = cpu_idle_adaptive; 870 } else { 871 rslt = B_FALSE; 872 } 873 break; 874 875 case CB_CODE_CPR_CHKPT: 876 idle_cpu = non_deep_idle_cpu; 877 disp_enq_thread = non_deep_idle_disp_enq_thread; 878 (void) cstate_timer_callback(CB_CODE_CPR_CHKPT); 879 break; 880 881 default: 882 cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code); 883 break; 884 } 885 mutex_exit(&cpu_idle_callb_mutex); 886 return (rslt); 887 } 888 889 /* 890 * handle _CST notification 891 */ 892 void 893 cpuidle_cstate_instance(cpu_t *cp) 894 { 895 #ifndef __xpv 896 cpupm_mach_state_t *mach_state = 897 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 898 cpu_acpi_handle_t handle; 899 struct machcpu *mcpu; 900 cpuset_t dom_cpu_set; 901 kmutex_t *pm_lock; 902 int result = 0; 903 processorid_t cpu_id; 904 905 if (mach_state == NULL) { 906 return; 907 } 908 909 ASSERT(mach_state->ms_cstate.cma_domain != NULL); 910 dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus; 911 pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock; 912 913 /* 914 * Do for all the CPU's in the domain 915 */ 916 mutex_enter(pm_lock); 917 do { 918 CPUSET_FIND(dom_cpu_set, cpu_id); 919 if (cpu_id == CPUSET_NOTINSET) 920 break; 921 922 ASSERT(cpu_id >= 0 && cpu_id < NCPU); 923 cp = cpu[cpu_id]; 924 mach_state = (cpupm_mach_state_t *) 925 cp->cpu_m.mcpu_pm_mach_state; 926 if (!(mach_state->ms_caps & CPUPM_C_STATES)) { 927 mutex_exit(pm_lock); 928 return; 929 } 930 handle = mach_state->ms_acpi_handle; 931 ASSERT(handle != NULL); 932 933 /* 934 * re-evaluate cstate object 935 */ 936 if (cpu_acpi_cache_cstate_data(handle) != 0) { 937 cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state" 938 " object Instance: %d", cpu_id); 939 } 940 mutex_enter(&cpu_lock); 941 mcpu = &(cp->cpu_m); 942 mcpu->max_cstates = cpu_acpi_get_max_cstates(handle); 943 if (mcpu->max_cstates > CPU_ACPI_C1) { 944 (void) cstate_timer_callback( 945 CST_EVENT_MULTIPLE_CSTATES); 946 disp_enq_thread = cstate_wakeup; 947 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 948 } else if (mcpu->max_cstates == CPU_ACPI_C1) { 949 disp_enq_thread = non_deep_idle_disp_enq_thread; 950 cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 951 (void) cstate_timer_callback(CST_EVENT_ONE_CSTATE); 952 } 953 mutex_exit(&cpu_lock); 954 955 CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result); 956 } while (result < 0); 957 mutex_exit(pm_lock); 958 #endif 959 } 960 961 /* 962 * handle the number or the type of available processor power states change 963 */ 964 void 965 cpuidle_manage_cstates(void *ctx) 966 { 967 cpu_t *cp = ctx; 968 cpupm_mach_state_t *mach_state = 969 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 970 boolean_t is_ready; 971 972 if (mach_state == NULL) { 973 return; 974 } 975 976 /* 977 * We currently refuse to power manage if the CPU is not ready to 978 * take cross calls (cross calls fail silently if CPU is not ready 979 * for it). 980 * 981 * Additionally, for x86 platforms we cannot power manage an instance, 982 * until it has been initialized. 983 */ 984 is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(cp); 985 if (!is_ready) 986 return; 987 988 cpuidle_cstate_instance(cp); 989 } 990