1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/x86_archext.h> 31 #include <sys/machsystm.h> 32 #include <sys/x_call.h> 33 #include <sys/stat.h> 34 #include <sys/acpi/acpi.h> 35 #include <sys/acpica.h> 36 #include <sys/cpu_acpi.h> 37 #include <sys/cpu_idle.h> 38 #include <sys/cpupm.h> 39 #include <sys/cpu_event.h> 40 #include <sys/hpet.h> 41 #include <sys/archsystm.h> 42 #include <vm/hat_i86.h> 43 #include <sys/dtrace.h> 44 #include <sys/sdt.h> 45 #include <sys/callb.h> 46 47 #define CSTATE_USING_HPET 1 48 #define CSTATE_USING_LAT 2 49 50 extern void cpu_idle_adaptive(void); 51 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data, 52 cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start); 53 54 static int cpu_idle_init(cpu_t *); 55 static void cpu_idle_fini(cpu_t *); 56 static boolean_t cpu_deep_idle_callb(void *arg, int code); 57 static boolean_t cpu_idle_cpr_callb(void *arg, int code); 58 static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate); 59 60 static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer); 61 62 /* 63 * the flag of always-running local APIC timer. 64 * the flag of HPET Timer use in deep cstate. 65 */ 66 static boolean_t cpu_cstate_arat = B_FALSE; 67 static boolean_t cpu_cstate_hpet = B_FALSE; 68 69 /* 70 * Interfaces for modules implementing Intel's deep c-state. 71 */ 72 cpupm_state_ops_t cpu_idle_ops = { 73 "Generic ACPI C-state Support", 74 cpu_idle_init, 75 cpu_idle_fini, 76 NULL 77 }; 78 79 static kmutex_t cpu_idle_callb_mutex; 80 static callb_id_t cpu_deep_idle_callb_id; 81 static callb_id_t cpu_idle_cpr_callb_id; 82 static uint_t cpu_idle_cfg_state; 83 84 static kmutex_t cpu_idle_mutex; 85 86 cpu_idle_kstat_t cpu_idle_kstat = { 87 { "address_space_id", KSTAT_DATA_STRING }, 88 { "latency", KSTAT_DATA_UINT32 }, 89 { "power", KSTAT_DATA_UINT32 }, 90 }; 91 92 /* 93 * kstat update function of the c-state info 94 */ 95 static int 96 cpu_idle_kstat_update(kstat_t *ksp, int flag) 97 { 98 cpu_acpi_cstate_t *cstate = ksp->ks_private; 99 100 if (flag == KSTAT_WRITE) { 101 return (EACCES); 102 } 103 104 if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 105 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 106 "FFixedHW"); 107 } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) { 108 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 109 "SystemIO"); 110 } else { 111 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 112 "Unsupported"); 113 } 114 115 cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency; 116 cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power; 117 118 return (0); 119 } 120 121 /* 122 * Used during configuration callbacks to manage implementation specific 123 * details of the hardware timer used during Deep C-state. 124 */ 125 boolean_t 126 cstate_timer_callback(int code) 127 { 128 if (cpu_cstate_arat) { 129 return (B_TRUE); 130 } else if (cpu_cstate_hpet) { 131 return (hpet.callback(code)); 132 } 133 return (B_FALSE); 134 } 135 136 /* 137 * Some Local APIC Timers do not work during Deep C-states. 138 * The Deep C-state idle function uses this function to ensure it is using a 139 * hardware timer that works during Deep C-states. This function also 140 * switches the timer back to the LACPI Timer after Deep C-state. 141 */ 142 static boolean_t 143 cstate_use_timer(hrtime_t *lapic_expire, int timer) 144 { 145 if (cpu_cstate_arat) 146 return (B_TRUE); 147 148 /* 149 * We have to return B_FALSE if no arat or hpet support 150 */ 151 if (!cpu_cstate_hpet) 152 return (B_FALSE); 153 154 switch (timer) { 155 case CSTATE_USING_HPET: 156 return (hpet.use_hpet_timer(lapic_expire)); 157 case CSTATE_USING_LAT: 158 hpet.use_lapic_timer(*lapic_expire); 159 return (B_TRUE); 160 default: 161 return (B_FALSE); 162 } 163 } 164 165 /* 166 * c-state wakeup function. 167 * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals 168 * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State. 169 */ 170 void 171 cstate_wakeup(cpu_t *cp, int bound) 172 { 173 struct machcpu *mcpu = &(cp->cpu_m); 174 volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait; 175 cpupart_t *cpu_part; 176 uint_t cpu_found; 177 processorid_t cpu_sid; 178 179 cpu_part = cp->cpu_part; 180 cpu_sid = cp->cpu_seqid; 181 /* 182 * Clear the halted bit for that CPU since it will be woken up 183 * in a moment. 184 */ 185 if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) { 186 /* 187 * Clear the halted bit for that CPU since it will be 188 * poked in a moment. 189 */ 190 bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid); 191 192 /* 193 * We may find the current CPU present in the halted cpuset 194 * if we're in the context of an interrupt that occurred 195 * before we had a chance to clear our bit in cpu_idle(). 196 * Waking ourself is obviously unnecessary, since if 197 * we're here, we're not halted. 198 */ 199 if (cp != CPU) { 200 /* 201 * Use correct wakeup mechanism 202 */ 203 if ((mcpu_mwait != NULL) && 204 (*mcpu_mwait == MWAIT_HALTED)) 205 MWAIT_WAKEUP(cp); 206 else 207 poke_cpu(cp->cpu_id); 208 } 209 return; 210 } else { 211 /* 212 * This cpu isn't halted, but it's idle or undergoing a 213 * context switch. No need to awaken anyone else. 214 */ 215 if (cp->cpu_thread == cp->cpu_idle_thread || 216 cp->cpu_disp_flags & CPU_DISP_DONTSTEAL) 217 return; 218 } 219 220 /* 221 * No need to wake up other CPUs if the thread we just enqueued 222 * is bound. 223 */ 224 if (bound) 225 return; 226 227 228 /* 229 * See if there's any other halted CPUs. If there are, then 230 * select one, and awaken it. 231 * It's possible that after we find a CPU, somebody else 232 * will awaken it before we get the chance. 233 * In that case, look again. 234 */ 235 do { 236 cpu_found = bitset_find(&cpu_part->cp_haltset); 237 if (cpu_found == (uint_t)-1) 238 return; 239 240 } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset, 241 cpu_found) < 0); 242 243 /* 244 * Must use correct wakeup mechanism to avoid lost wakeup of 245 * alternate cpu. 246 */ 247 if (cpu_found != CPU->cpu_seqid) { 248 mcpu_mwait = cpu[cpu_found]->cpu_m.mcpu_mwait; 249 if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED)) 250 MWAIT_WAKEUP(cpu_seq[cpu_found]); 251 else 252 poke_cpu(cpu_seq[cpu_found]->cpu_id); 253 } 254 } 255 256 /* 257 * Function called by CPU idle notification framework to check whether CPU 258 * has been awakened. It will be called with interrupt disabled. 259 * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle 260 * notification framework. 261 */ 262 static void 263 acpi_cpu_mwait_check_wakeup(void *arg) 264 { 265 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 266 267 ASSERT(arg != NULL); 268 if (*mcpu_mwait != MWAIT_HALTED) { 269 /* 270 * CPU has been awakened, notify CPU idle notification system. 271 */ 272 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 273 } else { 274 /* 275 * Toggle interrupt flag to detect pending interrupts. 276 * If interrupt happened, do_interrupt() will notify CPU idle 277 * notification framework so no need to call cpu_idle_exit() 278 * here. 279 */ 280 sti(); 281 SMT_PAUSE(); 282 cli(); 283 } 284 } 285 286 static void 287 acpi_cpu_mwait_ipi_check_wakeup(void *arg) 288 { 289 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 290 291 ASSERT(arg != NULL); 292 if (*mcpu_mwait != MWAIT_WAKEUP_IPI) { 293 /* 294 * CPU has been awakened, notify CPU idle notification system. 295 */ 296 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 297 } else { 298 /* 299 * Toggle interrupt flag to detect pending interrupts. 300 * If interrupt happened, do_interrupt() will notify CPU idle 301 * notification framework so no need to call cpu_idle_exit() 302 * here. 303 */ 304 sti(); 305 SMT_PAUSE(); 306 cli(); 307 } 308 } 309 310 /*ARGSUSED*/ 311 static void 312 acpi_cpu_check_wakeup(void *arg) 313 { 314 /* 315 * Toggle interrupt flag to detect pending interrupts. 316 * If interrupt happened, do_interrupt() will notify CPU idle 317 * notification framework so no need to call cpu_idle_exit() here. 318 */ 319 sti(); 320 SMT_PAUSE(); 321 cli(); 322 } 323 324 /* 325 * enter deep c-state handler 326 */ 327 static void 328 acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) 329 { 330 volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait; 331 cpu_t *cpup = CPU; 332 processorid_t cpu_sid = cpup->cpu_seqid; 333 cpupart_t *cp = cpup->cpu_part; 334 hrtime_t lapic_expire; 335 uint8_t type = cstate->cs_addrspace_id; 336 uint32_t cs_type = cstate->cs_type; 337 int hset_update = 1; 338 boolean_t using_timer; 339 cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup; 340 341 /* 342 * Set our mcpu_mwait here, so we can tell if anyone tries to 343 * wake us between now and when we call mwait. No other cpu will 344 * attempt to set our mcpu_mwait until we add ourself to the haltset. 345 */ 346 if (mcpu_mwait) { 347 if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 348 *mcpu_mwait = MWAIT_WAKEUP_IPI; 349 check_func = &acpi_cpu_mwait_ipi_check_wakeup; 350 } else { 351 *mcpu_mwait = MWAIT_HALTED; 352 check_func = &acpi_cpu_mwait_check_wakeup; 353 } 354 } 355 356 /* 357 * If this CPU is online, and there are multiple CPUs 358 * in the system, then we should note our halting 359 * by adding ourselves to the partition's halted CPU 360 * bitmap. This allows other CPUs to find/awaken us when 361 * work becomes available. 362 */ 363 if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1) 364 hset_update = 0; 365 366 /* 367 * Add ourselves to the partition's halted CPUs bitmask 368 * and set our HALTED flag, if necessary. 369 * 370 * When a thread becomes runnable, it is placed on the queue 371 * and then the halted cpuset is checked to determine who 372 * (if anyone) should be awakened. We therefore need to first 373 * add ourselves to the halted cpuset, and and then check if there 374 * is any work available. 375 * 376 * Note that memory barriers after updating the HALTED flag 377 * are not necessary since an atomic operation (updating the bitmap) 378 * immediately follows. On x86 the atomic operation acts as a 379 * memory barrier for the update of cpu_disp_flags. 380 */ 381 if (hset_update) { 382 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 383 bitset_atomic_add(&cp->cp_haltset, cpu_sid); 384 } 385 386 /* 387 * Check to make sure there's really nothing to do. 388 * Work destined for this CPU may become available after 389 * this check. We'll be notified through the clearing of our 390 * bit in the halted CPU bitmask, and a write to our mcpu_mwait. 391 * 392 * disp_anywork() checks disp_nrunnable, so we do not have to later. 393 */ 394 if (disp_anywork()) { 395 if (hset_update) { 396 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 397 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 398 } 399 return; 400 } 401 402 /* 403 * We're on our way to being halted. 404 * 405 * The local APIC timer can stop in ACPI C2 and deeper c-states. 406 * Try to program the HPET hardware to substitute for this CPU's 407 * LAPIC timer. 408 * cstate_use_timer() could disable the LAPIC Timer. Make sure 409 * to start the LAPIC Timer again before leaving this function. 410 * 411 * Disable interrupts here so we will awaken immediately after halting 412 * if someone tries to poke us between now and the time we actually 413 * halt. 414 */ 415 cli(); 416 using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET); 417 418 /* 419 * We check for the presence of our bit after disabling interrupts. 420 * If it's cleared, we'll return. If the bit is cleared after 421 * we check then the cstate_wakeup() will pop us out of the halted 422 * state. 423 * 424 * This means that the ordering of the cstate_wakeup() and the clearing 425 * of the bit by cpu_wakeup is important. 426 * cpu_wakeup() must clear our mc_haltset bit, and then call 427 * cstate_wakeup(). 428 * acpi_cpu_cstate() must disable interrupts, then check for the bit. 429 */ 430 if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) { 431 (void) cstate_use_timer(&lapic_expire, 432 CSTATE_USING_LAT); 433 sti(); 434 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 435 return; 436 } 437 438 /* 439 * The check for anything locally runnable is here for performance 440 * and isn't needed for correctness. disp_nrunnable ought to be 441 * in our cache still, so it's inexpensive to check, and if there 442 * is anything runnable we won't have to wait for the poke. 443 */ 444 if (cpup->cpu_disp->disp_nrunnable != 0) { 445 (void) cstate_use_timer(&lapic_expire, 446 CSTATE_USING_LAT); 447 sti(); 448 if (hset_update) { 449 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 450 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 451 } 452 return; 453 } 454 455 if (using_timer == B_FALSE) { 456 457 (void) cstate_use_timer(&lapic_expire, 458 CSTATE_USING_LAT); 459 sti(); 460 461 /* 462 * We are currently unable to program the HPET to act as this 463 * CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper 464 * because no timer is set to wake it up while its LAPIC timer 465 * stalls in deep C-States. 466 * Enter C1 instead. 467 * 468 * cstate_wake_cpu() will wake this CPU with an IPI which 469 * works with MWAIT. 470 */ 471 i86_monitor(mcpu_mwait, 0, 0); 472 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) { 473 if (cpu_idle_enter(IDLE_STATE_C1, 0, 474 check_func, (void *)mcpu_mwait) == 0) { 475 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == 476 MWAIT_HALTED) { 477 i86_mwait(0, 0); 478 } 479 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 480 } 481 } 482 483 /* 484 * We're no longer halted 485 */ 486 if (hset_update) { 487 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 488 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 489 } 490 return; 491 } 492 493 if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) { 494 /* 495 * We're on our way to being halted. 496 * To avoid a lost wakeup, arm the monitor before checking 497 * if another cpu wrote to mcpu_mwait to wake us up. 498 */ 499 i86_monitor(mcpu_mwait, 0, 0); 500 if (*mcpu_mwait == MWAIT_HALTED) { 501 if (cpu_idle_enter((uint_t)cs_type, 0, 502 check_func, (void *)mcpu_mwait) == 0) { 503 if (*mcpu_mwait == MWAIT_HALTED) { 504 i86_mwait(cstate->cs_address, 1); 505 } 506 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 507 } 508 } 509 } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 510 uint32_t value; 511 ACPI_TABLE_FADT *gbl_FADT; 512 513 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 514 if (cpu_idle_enter((uint_t)cs_type, 0, 515 check_func, (void *)mcpu_mwait) == 0) { 516 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 517 (void) cpu_acpi_read_port( 518 cstate->cs_address, &value, 8); 519 acpica_get_global_FADT(&gbl_FADT); 520 (void) cpu_acpi_read_port( 521 gbl_FADT->XPmTimerBlock.Address, 522 &value, 32); 523 } 524 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 525 } 526 } 527 } 528 529 /* 530 * The LAPIC timer may have stopped in deep c-state. 531 * Reprogram this CPU's LAPIC here before enabling interrupts. 532 */ 533 (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT); 534 sti(); 535 536 /* 537 * We're no longer halted 538 */ 539 if (hset_update) { 540 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 541 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 542 } 543 } 544 545 /* 546 * Idle the present CPU, deep c-state is supported 547 */ 548 void 549 cpu_acpi_idle(void) 550 { 551 cpu_t *cp = CPU; 552 cpu_acpi_handle_t handle; 553 cma_c_state_t *cs_data; 554 cpu_acpi_cstate_t *cstates; 555 hrtime_t start, end; 556 int cpu_max_cstates; 557 uint32_t cs_indx; 558 uint16_t cs_type; 559 560 cpupm_mach_state_t *mach_state = 561 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 562 handle = mach_state->ms_acpi_handle; 563 ASSERT(CPU_ACPI_CSTATES(handle) != NULL); 564 565 cs_data = mach_state->ms_cstate.cma_state.cstate; 566 cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 567 ASSERT(cstates != NULL); 568 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 569 if (cpu_max_cstates > CPU_MAX_CSTATES) 570 cpu_max_cstates = CPU_MAX_CSTATES; 571 if (cpu_max_cstates == 1) { /* no ACPI c-state data */ 572 (*non_deep_idle_cpu)(); 573 return; 574 } 575 576 start = gethrtime_unscaled(); 577 578 cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start); 579 580 cs_type = cstates[cs_indx].cs_type; 581 582 switch (cs_type) { 583 default: 584 /* FALLTHROUGH */ 585 case CPU_ACPI_C1: 586 (*non_deep_idle_cpu)(); 587 break; 588 589 case CPU_ACPI_C2: 590 acpi_cpu_cstate(&cstates[cs_indx]); 591 break; 592 593 case CPU_ACPI_C3: 594 /* 595 * All supported Intel processors maintain cache coherency 596 * during C3. Currently when entering C3 processors flush 597 * core caches to higher level shared cache. The shared cache 598 * maintains state and supports probes during C3. 599 * Consequently there is no need to handle cache coherency 600 * and Bus Master activity here with the cache flush, BM_RLD 601 * bit, BM_STS bit, nor PM2_CNT.ARB_DIS mechanisms described 602 * in section 8.1.4 of the ACPI Specification 4.0. 603 */ 604 acpi_cpu_cstate(&cstates[cs_indx]); 605 break; 606 } 607 608 end = gethrtime_unscaled(); 609 610 /* 611 * Update statistics 612 */ 613 cpupm_wakeup_cstate_data(cs_data, end); 614 } 615 616 boolean_t 617 cpu_deep_cstates_supported(void) 618 { 619 extern int idle_cpu_no_deep_c; 620 621 if (idle_cpu_no_deep_c) 622 return (B_FALSE); 623 624 if (!cpuid_deep_cstates_supported()) 625 return (B_FALSE); 626 627 if (cpuid_arat_supported()) { 628 cpu_cstate_arat = B_TRUE; 629 return (B_TRUE); 630 } 631 632 if ((hpet.supported == HPET_FULL_SUPPORT) && 633 hpet.install_proxy()) { 634 cpu_cstate_hpet = B_TRUE; 635 return (B_TRUE); 636 } 637 638 return (B_FALSE); 639 } 640 641 /* 642 * Validate that this processor supports deep cstate and if so, 643 * get the c-state data from ACPI and cache it. 644 */ 645 static int 646 cpu_idle_init(cpu_t *cp) 647 { 648 cpupm_mach_state_t *mach_state = 649 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 650 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 651 cpu_acpi_cstate_t *cstate; 652 char name[KSTAT_STRLEN]; 653 int cpu_max_cstates, i; 654 int ret; 655 656 /* 657 * Cache the C-state specific ACPI data. 658 */ 659 if ((ret = cpu_acpi_cache_cstate_data(handle)) != 0) { 660 if (ret < 0) 661 cmn_err(CE_NOTE, 662 "!Support for CPU deep idle states is being " 663 "disabled due to errors parsing ACPI C-state " 664 "objects exported by BIOS."); 665 cpu_idle_fini(cp); 666 return (-1); 667 } 668 669 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 670 671 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 672 673 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 674 (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type); 675 /* 676 * Allocate, initialize and install cstate kstat 677 */ 678 cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id, 679 name, "misc", 680 KSTAT_TYPE_NAMED, 681 sizeof (cpu_idle_kstat) / sizeof (kstat_named_t), 682 KSTAT_FLAG_VIRTUAL); 683 684 if (cstate->cs_ksp == NULL) { 685 cmn_err(CE_NOTE, "kstat_create(c_state) fail"); 686 } else { 687 cstate->cs_ksp->ks_data = &cpu_idle_kstat; 688 cstate->cs_ksp->ks_lock = &cpu_idle_mutex; 689 cstate->cs_ksp->ks_update = cpu_idle_kstat_update; 690 cstate->cs_ksp->ks_data_size += MAXNAMELEN; 691 cstate->cs_ksp->ks_private = cstate; 692 kstat_install(cstate->cs_ksp); 693 cstate++; 694 } 695 } 696 697 cpupm_alloc_domains(cp, CPUPM_C_STATES); 698 cpupm_alloc_ms_cstate(cp); 699 700 if (cpu_deep_cstates_supported()) { 701 uint32_t value; 702 703 mutex_enter(&cpu_idle_callb_mutex); 704 if (cpu_deep_idle_callb_id == (callb_id_t)0) 705 cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb, 706 (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle"); 707 if (cpu_idle_cpr_callb_id == (callb_id_t)0) 708 cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb, 709 (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr"); 710 mutex_exit(&cpu_idle_callb_mutex); 711 712 713 /* 714 * All supported CPUs (Nehalem and later) will remain in C3 715 * during Bus Master activity. 716 * All CPUs set ACPI_BITREG_BUS_MASTER_RLD to 0 here if it 717 * is not already 0 before enabling Deeper C-states. 718 */ 719 cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_RLD, &value); 720 if (value & 1) 721 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 722 } 723 724 return (0); 725 } 726 727 /* 728 * Free resources allocated by cpu_idle_init(). 729 */ 730 static void 731 cpu_idle_fini(cpu_t *cp) 732 { 733 cpupm_mach_state_t *mach_state = 734 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 735 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 736 cpu_acpi_cstate_t *cstate; 737 uint_t cpu_max_cstates, i; 738 739 /* 740 * idle cpu points back to the generic one 741 */ 742 idle_cpu = CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 743 disp_enq_thread = non_deep_idle_disp_enq_thread; 744 745 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 746 if (cstate) { 747 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 748 749 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 750 if (cstate->cs_ksp != NULL) 751 kstat_delete(cstate->cs_ksp); 752 cstate++; 753 } 754 } 755 756 cpupm_free_ms_cstate(cp); 757 cpupm_free_domains(&cpupm_cstate_domains); 758 cpu_acpi_free_cstate_data(handle); 759 760 mutex_enter(&cpu_idle_callb_mutex); 761 if (cpu_deep_idle_callb_id != (callb_id_t)0) { 762 (void) callb_delete(cpu_deep_idle_callb_id); 763 cpu_deep_idle_callb_id = (callb_id_t)0; 764 } 765 if (cpu_idle_cpr_callb_id != (callb_id_t)0) { 766 (void) callb_delete(cpu_idle_cpr_callb_id); 767 cpu_idle_cpr_callb_id = (callb_id_t)0; 768 } 769 mutex_exit(&cpu_idle_callb_mutex); 770 } 771 772 /*ARGSUSED*/ 773 static boolean_t 774 cpu_deep_idle_callb(void *arg, int code) 775 { 776 boolean_t rslt = B_TRUE; 777 778 mutex_enter(&cpu_idle_callb_mutex); 779 switch (code) { 780 case PM_DEFAULT_CPU_DEEP_IDLE: 781 /* 782 * Default policy is same as enable 783 */ 784 /*FALLTHROUGH*/ 785 case PM_ENABLE_CPU_DEEP_IDLE: 786 if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0) 787 break; 788 789 if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) { 790 disp_enq_thread = cstate_wakeup; 791 idle_cpu = cpu_idle_adaptive; 792 cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG; 793 } else { 794 rslt = B_FALSE; 795 } 796 break; 797 798 case PM_DISABLE_CPU_DEEP_IDLE: 799 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 800 break; 801 802 idle_cpu = non_deep_idle_cpu; 803 if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) { 804 disp_enq_thread = non_deep_idle_disp_enq_thread; 805 cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG; 806 } 807 break; 808 809 default: 810 cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n", 811 code); 812 break; 813 } 814 mutex_exit(&cpu_idle_callb_mutex); 815 return (rslt); 816 } 817 818 /*ARGSUSED*/ 819 static boolean_t 820 cpu_idle_cpr_callb(void *arg, int code) 821 { 822 boolean_t rslt = B_TRUE; 823 824 mutex_enter(&cpu_idle_callb_mutex); 825 switch (code) { 826 case CB_CODE_CPR_RESUME: 827 if (cstate_timer_callback(CB_CODE_CPR_RESUME)) { 828 /* 829 * Do not enable dispatcher hooks if disabled by user. 830 */ 831 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 832 break; 833 834 disp_enq_thread = cstate_wakeup; 835 idle_cpu = cpu_idle_adaptive; 836 } else { 837 rslt = B_FALSE; 838 } 839 break; 840 841 case CB_CODE_CPR_CHKPT: 842 idle_cpu = non_deep_idle_cpu; 843 disp_enq_thread = non_deep_idle_disp_enq_thread; 844 (void) cstate_timer_callback(CB_CODE_CPR_CHKPT); 845 break; 846 847 default: 848 cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code); 849 break; 850 } 851 mutex_exit(&cpu_idle_callb_mutex); 852 return (rslt); 853 } 854 855 /* 856 * handle _CST notification 857 */ 858 void 859 cpuidle_cstate_instance(cpu_t *cp) 860 { 861 #ifndef __xpv 862 cpupm_mach_state_t *mach_state = 863 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 864 cpu_acpi_handle_t handle; 865 struct machcpu *mcpu; 866 cpuset_t dom_cpu_set; 867 kmutex_t *pm_lock; 868 int result = 0; 869 processorid_t cpu_id; 870 871 if (mach_state == NULL) { 872 return; 873 } 874 875 ASSERT(mach_state->ms_cstate.cma_domain != NULL); 876 dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus; 877 pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock; 878 879 /* 880 * Do for all the CPU's in the domain 881 */ 882 mutex_enter(pm_lock); 883 do { 884 CPUSET_FIND(dom_cpu_set, cpu_id); 885 if (cpu_id == CPUSET_NOTINSET) 886 break; 887 888 ASSERT(cpu_id >= 0 && cpu_id < NCPU); 889 cp = cpu[cpu_id]; 890 mach_state = (cpupm_mach_state_t *) 891 cp->cpu_m.mcpu_pm_mach_state; 892 if (!(mach_state->ms_caps & CPUPM_C_STATES)) { 893 mutex_exit(pm_lock); 894 return; 895 } 896 handle = mach_state->ms_acpi_handle; 897 ASSERT(handle != NULL); 898 899 /* 900 * re-evaluate cstate object 901 */ 902 if (cpu_acpi_cache_cstate_data(handle) != 0) { 903 cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state" 904 " object Instance: %d", cpu_id); 905 } 906 mutex_enter(&cpu_lock); 907 mcpu = &(cp->cpu_m); 908 mcpu->max_cstates = cpu_acpi_get_max_cstates(handle); 909 if (mcpu->max_cstates > CPU_ACPI_C1) { 910 (void) cstate_timer_callback( 911 CST_EVENT_MULTIPLE_CSTATES); 912 disp_enq_thread = cstate_wakeup; 913 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 914 } else if (mcpu->max_cstates == CPU_ACPI_C1) { 915 disp_enq_thread = non_deep_idle_disp_enq_thread; 916 cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 917 (void) cstate_timer_callback(CST_EVENT_ONE_CSTATE); 918 } 919 mutex_exit(&cpu_lock); 920 921 CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result); 922 mutex_exit(pm_lock); 923 } while (result < 0); 924 #endif 925 } 926 927 /* 928 * handle the number or the type of available processor power states change 929 */ 930 void 931 cpuidle_manage_cstates(void *ctx) 932 { 933 cpu_t *cp = ctx; 934 cpupm_mach_state_t *mach_state = 935 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 936 boolean_t is_ready; 937 938 if (mach_state == NULL) { 939 return; 940 } 941 942 /* 943 * We currently refuse to power manage if the CPU is not ready to 944 * take cross calls (cross calls fail silently if CPU is not ready 945 * for it). 946 * 947 * Additionally, for x86 platforms we cannot power manage 948 * any one instance, until all instances have been initialized. 949 * That's because we don't know what the CPU domains look like 950 * until all instances have been initialized. 951 */ 952 is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(); 953 if (!is_ready) 954 return; 955 956 cpuidle_cstate_instance(cp); 957 } 958