1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/x86_archext.h> 31 #include <sys/machsystm.h> 32 #include <sys/x_call.h> 33 #include <sys/stat.h> 34 #include <sys/acpi/acpi.h> 35 #include <sys/acpica.h> 36 #include <sys/cpu_acpi.h> 37 #include <sys/cpu_idle.h> 38 #include <sys/cpupm.h> 39 #include <sys/hpet.h> 40 #include <sys/archsystm.h> 41 #include <vm/hat_i86.h> 42 #include <sys/dtrace.h> 43 #include <sys/sdt.h> 44 #include <sys/callb.h> 45 46 #define CSTATE_USING_HPET 1 47 #define CSTATE_USING_LAT 2 48 49 extern void cpu_idle_adaptive(void); 50 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data, 51 cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start); 52 53 static int cpu_idle_init(cpu_t *); 54 static void cpu_idle_fini(cpu_t *); 55 static boolean_t cpu_deep_idle_callb(void *arg, int code); 56 static boolean_t cpu_idle_cpr_callb(void *arg, int code); 57 static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate); 58 59 static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer); 60 61 /* 62 * the flag of always-running local APIC timer. 63 * the flag of HPET Timer use in deep cstate. 64 */ 65 static boolean_t cpu_cstate_arat = B_FALSE; 66 static boolean_t cpu_cstate_hpet = B_FALSE; 67 68 /* 69 * Interfaces for modules implementing Intel's deep c-state. 70 */ 71 cpupm_state_ops_t cpu_idle_ops = { 72 "Generic ACPI C-state Support", 73 cpu_idle_init, 74 cpu_idle_fini, 75 NULL 76 }; 77 78 static kmutex_t cpu_idle_callb_mutex; 79 static callb_id_t cpu_deep_idle_callb_id; 80 static callb_id_t cpu_idle_cpr_callb_id; 81 static uint_t cpu_idle_cfg_state; 82 83 static kmutex_t cpu_idle_mutex; 84 85 cpu_idle_kstat_t cpu_idle_kstat = { 86 { "address_space_id", KSTAT_DATA_STRING }, 87 { "latency", KSTAT_DATA_UINT32 }, 88 { "power", KSTAT_DATA_UINT32 }, 89 }; 90 91 /* 92 * kstat update function of the c-state info 93 */ 94 static int 95 cpu_idle_kstat_update(kstat_t *ksp, int flag) 96 { 97 cpu_acpi_cstate_t *cstate = ksp->ks_private; 98 99 if (flag == KSTAT_WRITE) { 100 return (EACCES); 101 } 102 103 if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 104 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 105 "FFixedHW"); 106 } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) { 107 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 108 "SystemIO"); 109 } else { 110 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 111 "Unsupported"); 112 } 113 114 cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency; 115 cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power; 116 117 return (0); 118 } 119 120 /* 121 * Used during configuration callbacks to manage implementation specific 122 * details of the hardware timer used during Deep C-state. 123 */ 124 boolean_t 125 cstate_timer_callback(int code) 126 { 127 if (cpu_cstate_arat) { 128 return (B_TRUE); 129 } else if (cpu_cstate_hpet) { 130 return (hpet.callback(code)); 131 } 132 return (B_FALSE); 133 } 134 135 /* 136 * Some Local APIC Timers do not work during Deep C-states. 137 * The Deep C-state idle function uses this function to ensure it is using a 138 * hardware timer that works during Deep C-states. This function also 139 * switches the timer back to the LACPI Timer after Deep C-state. 140 */ 141 static boolean_t 142 cstate_use_timer(hrtime_t *lapic_expire, int timer) 143 { 144 if (cpu_cstate_arat) 145 return (B_TRUE); 146 147 /* 148 * We have to return B_FALSE if no arat or hpet support 149 */ 150 if (!cpu_cstate_hpet) 151 return (B_FALSE); 152 153 switch (timer) { 154 case CSTATE_USING_HPET: 155 return (hpet.use_hpet_timer(lapic_expire)); 156 case CSTATE_USING_LAT: 157 hpet.use_lapic_timer(*lapic_expire); 158 return (B_TRUE); 159 default: 160 return (B_FALSE); 161 } 162 } 163 164 /* 165 * c-state wakeup function. 166 * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals 167 * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State. 168 */ 169 void 170 cstate_wakeup(cpu_t *cp, int bound) 171 { 172 struct machcpu *mcpu = &(cp->cpu_m); 173 volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait; 174 cpupart_t *cpu_part; 175 uint_t cpu_found; 176 processorid_t cpu_sid; 177 178 cpu_part = cp->cpu_part; 179 cpu_sid = cp->cpu_seqid; 180 /* 181 * Clear the halted bit for that CPU since it will be woken up 182 * in a moment. 183 */ 184 if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) { 185 /* 186 * Clear the halted bit for that CPU since it will be 187 * poked in a moment. 188 */ 189 bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid); 190 191 /* 192 * We may find the current CPU present in the halted cpuset 193 * if we're in the context of an interrupt that occurred 194 * before we had a chance to clear our bit in cpu_idle(). 195 * Waking ourself is obviously unnecessary, since if 196 * we're here, we're not halted. 197 */ 198 if (cp != CPU) { 199 /* 200 * Use correct wakeup mechanism 201 */ 202 if ((mcpu_mwait != NULL) && 203 (*mcpu_mwait == MWAIT_HALTED)) 204 MWAIT_WAKEUP(cp); 205 else 206 poke_cpu(cp->cpu_id); 207 } 208 return; 209 } else { 210 /* 211 * This cpu isn't halted, but it's idle or undergoing a 212 * context switch. No need to awaken anyone else. 213 */ 214 if (cp->cpu_thread == cp->cpu_idle_thread || 215 cp->cpu_disp_flags & CPU_DISP_DONTSTEAL) 216 return; 217 } 218 219 /* 220 * No need to wake up other CPUs if the thread we just enqueued 221 * is bound. 222 */ 223 if (bound) 224 return; 225 226 227 /* 228 * See if there's any other halted CPUs. If there are, then 229 * select one, and awaken it. 230 * It's possible that after we find a CPU, somebody else 231 * will awaken it before we get the chance. 232 * In that case, look again. 233 */ 234 do { 235 cpu_found = bitset_find(&cpu_part->cp_haltset); 236 if (cpu_found == (uint_t)-1) 237 return; 238 239 } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset, 240 cpu_found) < 0); 241 242 /* 243 * Must use correct wakeup mechanism to avoid lost wakeup of 244 * alternate cpu. 245 */ 246 if (cpu_found != CPU->cpu_seqid) { 247 mcpu_mwait = cpu[cpu_found]->cpu_m.mcpu_mwait; 248 if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED)) 249 MWAIT_WAKEUP(cpu_seq[cpu_found]); 250 else 251 poke_cpu(cpu_seq[cpu_found]->cpu_id); 252 } 253 } 254 255 /* 256 * enter deep c-state handler 257 */ 258 static void 259 acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) 260 { 261 volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait; 262 cpu_t *cpup = CPU; 263 processorid_t cpu_sid = cpup->cpu_seqid; 264 cpupart_t *cp = cpup->cpu_part; 265 hrtime_t lapic_expire; 266 uint8_t type = cstate->cs_addrspace_id; 267 uint32_t cs_type = cstate->cs_type; 268 int hset_update = 1; 269 boolean_t using_timer; 270 271 /* 272 * Set our mcpu_mwait here, so we can tell if anyone tries to 273 * wake us between now and when we call mwait. No other cpu will 274 * attempt to set our mcpu_mwait until we add ourself to the haltset. 275 */ 276 if (mcpu_mwait) { 277 if (type == ACPI_ADR_SPACE_SYSTEM_IO) 278 *mcpu_mwait = MWAIT_WAKEUP_IPI; 279 else 280 *mcpu_mwait = MWAIT_HALTED; 281 } 282 283 /* 284 * If this CPU is online, and there are multiple CPUs 285 * in the system, then we should note our halting 286 * by adding ourselves to the partition's halted CPU 287 * bitmap. This allows other CPUs to find/awaken us when 288 * work becomes available. 289 */ 290 if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1) 291 hset_update = 0; 292 293 /* 294 * Add ourselves to the partition's halted CPUs bitmask 295 * and set our HALTED flag, if necessary. 296 * 297 * When a thread becomes runnable, it is placed on the queue 298 * and then the halted cpuset is checked to determine who 299 * (if anyone) should be awakened. We therefore need to first 300 * add ourselves to the halted cpuset, and and then check if there 301 * is any work available. 302 * 303 * Note that memory barriers after updating the HALTED flag 304 * are not necessary since an atomic operation (updating the bitmap) 305 * immediately follows. On x86 the atomic operation acts as a 306 * memory barrier for the update of cpu_disp_flags. 307 */ 308 if (hset_update) { 309 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 310 bitset_atomic_add(&cp->cp_haltset, cpu_sid); 311 } 312 313 /* 314 * Check to make sure there's really nothing to do. 315 * Work destined for this CPU may become available after 316 * this check. We'll be notified through the clearing of our 317 * bit in the halted CPU bitmask, and a write to our mcpu_mwait. 318 * 319 * disp_anywork() checks disp_nrunnable, so we do not have to later. 320 */ 321 if (disp_anywork()) { 322 if (hset_update) { 323 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 324 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 325 } 326 return; 327 } 328 329 /* 330 * We're on our way to being halted. 331 * 332 * The local APIC timer can stop in ACPI C2 and deeper c-states. 333 * Try to program the HPET hardware to substitute for this CPU's 334 * LAPIC timer. 335 * cstate_use_timer() could disable the LAPIC Timer. Make sure 336 * to start the LAPIC Timer again before leaving this function. 337 * 338 * Disable interrupts here so we will awaken immediately after halting 339 * if someone tries to poke us between now and the time we actually 340 * halt. 341 */ 342 cli(); 343 using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET); 344 345 /* 346 * We check for the presence of our bit after disabling interrupts. 347 * If it's cleared, we'll return. If the bit is cleared after 348 * we check then the cstate_wakeup() will pop us out of the halted 349 * state. 350 * 351 * This means that the ordering of the cstate_wakeup() and the clearing 352 * of the bit by cpu_wakeup is important. 353 * cpu_wakeup() must clear our mc_haltset bit, and then call 354 * cstate_wakeup(). 355 * acpi_cpu_cstate() must disable interrupts, then check for the bit. 356 */ 357 if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) { 358 (void) cstate_use_timer(&lapic_expire, 359 CSTATE_USING_LAT); 360 sti(); 361 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 362 return; 363 } 364 365 /* 366 * The check for anything locally runnable is here for performance 367 * and isn't needed for correctness. disp_nrunnable ought to be 368 * in our cache still, so it's inexpensive to check, and if there 369 * is anything runnable we won't have to wait for the poke. 370 */ 371 if (cpup->cpu_disp->disp_nrunnable != 0) { 372 (void) cstate_use_timer(&lapic_expire, 373 CSTATE_USING_LAT); 374 sti(); 375 if (hset_update) { 376 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 377 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 378 } 379 return; 380 } 381 382 if (using_timer == B_FALSE) { 383 384 (void) cstate_use_timer(&lapic_expire, 385 CSTATE_USING_LAT); 386 sti(); 387 388 /* 389 * We are currently unable to program the HPET to act as this 390 * CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper 391 * because no timer is set to wake it up while its LAPIC timer 392 * stalls in deep C-States. 393 * Enter C1 instead. 394 * 395 * cstate_wake_cpu() will wake this CPU with an IPI which 396 * works with MWAIT. 397 */ 398 i86_monitor(mcpu_mwait, 0, 0); 399 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) { 400 cpu_dtrace_idle_probe(CPU_ACPI_C1); 401 402 tlb_going_idle(); 403 i86_mwait(0, 0); 404 tlb_service(); 405 406 cpu_dtrace_idle_probe(CPU_ACPI_C0); 407 } 408 409 /* 410 * We're no longer halted 411 */ 412 if (hset_update) { 413 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 414 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 415 } 416 return; 417 } 418 419 cpu_dtrace_idle_probe((uint_t)cs_type); 420 421 if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) { 422 /* 423 * We're on our way to being halted. 424 * To avoid a lost wakeup, arm the monitor before checking 425 * if another cpu wrote to mcpu_mwait to wake us up. 426 */ 427 i86_monitor(mcpu_mwait, 0, 0); 428 if (*mcpu_mwait == MWAIT_HALTED) { 429 uint32_t eax = cstate->cs_address; 430 uint32_t ecx = 1; 431 432 tlb_going_idle(); 433 i86_mwait(eax, ecx); 434 tlb_service(); 435 } 436 } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 437 uint32_t value; 438 ACPI_TABLE_FADT *gbl_FADT; 439 440 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 441 tlb_going_idle(); 442 (void) cpu_acpi_read_port(cstate->cs_address, 443 &value, 8); 444 acpica_get_global_FADT(&gbl_FADT); 445 (void) cpu_acpi_read_port( 446 gbl_FADT->XPmTimerBlock.Address, &value, 32); 447 tlb_service(); 448 } 449 } 450 451 /* 452 * The LAPIC timer may have stopped in deep c-state. 453 * Reprogram this CPU's LAPIC here before enabling interrupts. 454 */ 455 (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT); 456 sti(); 457 458 cpu_dtrace_idle_probe(CPU_ACPI_C0); 459 460 /* 461 * We're no longer halted 462 */ 463 if (hset_update) { 464 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 465 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 466 } 467 } 468 469 /* 470 * indicate when bus masters are active 471 */ 472 static uint32_t 473 cpu_acpi_bm_sts(void) 474 { 475 uint32_t bm_sts = 0; 476 477 cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_sts); 478 479 if (bm_sts) 480 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 481 482 return (bm_sts); 483 } 484 485 /* 486 * Idle the present CPU, deep c-state is supported 487 */ 488 void 489 cpu_acpi_idle(void) 490 { 491 cpu_t *cp = CPU; 492 cpu_acpi_handle_t handle; 493 cma_c_state_t *cs_data; 494 cpu_acpi_cstate_t *cstates; 495 hrtime_t start, end; 496 int cpu_max_cstates; 497 uint32_t cs_indx; 498 uint16_t cs_type; 499 500 cpupm_mach_state_t *mach_state = 501 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 502 handle = mach_state->ms_acpi_handle; 503 ASSERT(CPU_ACPI_CSTATES(handle) != NULL); 504 505 cs_data = mach_state->ms_cstate.cma_state.cstate; 506 cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 507 ASSERT(cstates != NULL); 508 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 509 if (cpu_max_cstates > CPU_MAX_CSTATES) 510 cpu_max_cstates = CPU_MAX_CSTATES; 511 if (cpu_max_cstates == 1) { /* no ACPI c-state data */ 512 (*non_deep_idle_cpu)(); 513 return; 514 } 515 516 start = gethrtime_unscaled(); 517 518 cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start); 519 520 /* 521 * OSPM uses the BM_STS bit to determine the power state to enter 522 * when considering a transition to or from the C2/C3 power state. 523 * if C3 is determined, bus master activity demotes the power state 524 * to C2. 525 */ 526 if ((cstates[cs_indx].cs_type >= CPU_ACPI_C3) && cpu_acpi_bm_sts()) 527 --cs_indx; 528 cs_type = cstates[cs_indx].cs_type; 529 530 /* 531 * BM_RLD determines if the Cx power state was exited as a result of 532 * bus master requests. Set this bit when using a C3 power state, and 533 * clear it when using a C1 or C2 power state. 534 */ 535 if ((CPU_ACPI_BM_INFO(handle) & BM_RLD) && (cs_type < CPU_ACPI_C3)) { 536 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 537 CPU_ACPI_BM_INFO(handle) &= ~BM_RLD; 538 } 539 540 if ((!(CPU_ACPI_BM_INFO(handle) & BM_RLD)) && 541 (cs_type >= CPU_ACPI_C3)) { 542 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 543 CPU_ACPI_BM_INFO(handle) |= BM_RLD; 544 } 545 546 switch (cs_type) { 547 default: 548 /* FALLTHROUGH */ 549 case CPU_ACPI_C1: 550 (*non_deep_idle_cpu)(); 551 break; 552 553 case CPU_ACPI_C2: 554 acpi_cpu_cstate(&cstates[cs_indx]); 555 break; 556 557 case CPU_ACPI_C3: 558 /* 559 * recommended in ACPI spec, providing hardware mechanisms 560 * to prevent master from writing to memory (UP-only) 561 */ 562 if ((ncpus_online == 1) && 563 (CPU_ACPI_BM_INFO(handle) & BM_CTL)) { 564 cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 565 CPU_ACPI_BM_INFO(handle) |= BM_ARB_DIS; 566 /* 567 * Today all Intel's processor support C3 share cache. 568 */ 569 } else if (x86_vendor != X86_VENDOR_Intel) { 570 __acpi_wbinvd(); 571 } 572 acpi_cpu_cstate(&cstates[cs_indx]); 573 if (CPU_ACPI_BM_INFO(handle) & BM_ARB_DIS) { 574 cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 575 CPU_ACPI_BM_INFO(handle) &= ~BM_ARB_DIS; 576 } 577 break; 578 } 579 580 end = gethrtime_unscaled(); 581 582 /* 583 * Update statistics 584 */ 585 cpupm_wakeup_cstate_data(cs_data, end); 586 } 587 588 boolean_t 589 cpu_deep_cstates_supported(void) 590 { 591 extern int idle_cpu_no_deep_c; 592 593 if (idle_cpu_no_deep_c) 594 return (B_FALSE); 595 596 if (!cpuid_deep_cstates_supported()) 597 return (B_FALSE); 598 599 if (cpuid_arat_supported()) { 600 cpu_cstate_arat = B_TRUE; 601 return (B_TRUE); 602 } 603 604 if ((hpet.supported == HPET_FULL_SUPPORT) && 605 hpet.install_proxy()) { 606 cpu_cstate_hpet = B_TRUE; 607 return (B_TRUE); 608 } 609 610 return (B_FALSE); 611 } 612 613 /* 614 * Validate that this processor supports deep cstate and if so, 615 * get the c-state data from ACPI and cache it. 616 */ 617 static int 618 cpu_idle_init(cpu_t *cp) 619 { 620 cpupm_mach_state_t *mach_state = 621 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 622 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 623 cpu_acpi_cstate_t *cstate; 624 char name[KSTAT_STRLEN]; 625 int cpu_max_cstates, i; 626 ACPI_TABLE_FADT *gbl_FADT; 627 628 /* 629 * Cache the C-state specific ACPI data. 630 */ 631 if (cpu_acpi_cache_cstate_data(handle) != 0) { 632 cmn_err(CE_NOTE, 633 "!cpu_idle_init: Failed to cache ACPI C-state data\n"); 634 cpu_idle_fini(cp); 635 return (-1); 636 } 637 638 /* 639 * Check the bus master arbitration control ability. 640 */ 641 acpica_get_global_FADT(&gbl_FADT); 642 if (gbl_FADT->Pm2ControlBlock && gbl_FADT->Pm2ControlLength) 643 CPU_ACPI_BM_INFO(handle) |= BM_CTL; 644 645 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 646 647 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 648 649 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 650 (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type); 651 /* 652 * Allocate, initialize and install cstate kstat 653 */ 654 cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id, 655 name, "misc", 656 KSTAT_TYPE_NAMED, 657 sizeof (cpu_idle_kstat) / sizeof (kstat_named_t), 658 KSTAT_FLAG_VIRTUAL); 659 660 if (cstate->cs_ksp == NULL) { 661 cmn_err(CE_NOTE, "kstat_create(c_state) fail"); 662 } else { 663 cstate->cs_ksp->ks_data = &cpu_idle_kstat; 664 cstate->cs_ksp->ks_lock = &cpu_idle_mutex; 665 cstate->cs_ksp->ks_update = cpu_idle_kstat_update; 666 cstate->cs_ksp->ks_data_size += MAXNAMELEN; 667 cstate->cs_ksp->ks_private = cstate; 668 kstat_install(cstate->cs_ksp); 669 cstate++; 670 } 671 } 672 673 cpupm_alloc_domains(cp, CPUPM_C_STATES); 674 cpupm_alloc_ms_cstate(cp); 675 676 if (cpu_deep_cstates_supported()) { 677 mutex_enter(&cpu_idle_callb_mutex); 678 if (cpu_deep_idle_callb_id == (callb_id_t)0) 679 cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb, 680 (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle"); 681 if (cpu_idle_cpr_callb_id == (callb_id_t)0) 682 cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb, 683 (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr"); 684 mutex_exit(&cpu_idle_callb_mutex); 685 } 686 687 return (0); 688 } 689 690 /* 691 * Free resources allocated by cpu_idle_init(). 692 */ 693 static void 694 cpu_idle_fini(cpu_t *cp) 695 { 696 cpupm_mach_state_t *mach_state = 697 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 698 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 699 cpu_acpi_cstate_t *cstate; 700 uint_t cpu_max_cstates, i; 701 702 /* 703 * idle cpu points back to the generic one 704 */ 705 idle_cpu = CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 706 disp_enq_thread = non_deep_idle_disp_enq_thread; 707 708 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 709 if (cstate) { 710 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 711 712 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 713 if (cstate->cs_ksp != NULL) 714 kstat_delete(cstate->cs_ksp); 715 cstate++; 716 } 717 } 718 719 cpupm_free_ms_cstate(cp); 720 cpupm_free_domains(&cpupm_cstate_domains); 721 cpu_acpi_free_cstate_data(handle); 722 723 mutex_enter(&cpu_idle_callb_mutex); 724 if (cpu_deep_idle_callb_id != (callb_id_t)0) { 725 (void) callb_delete(cpu_deep_idle_callb_id); 726 cpu_deep_idle_callb_id = (callb_id_t)0; 727 } 728 if (cpu_idle_cpr_callb_id != (callb_id_t)0) { 729 (void) callb_delete(cpu_idle_cpr_callb_id); 730 cpu_idle_cpr_callb_id = (callb_id_t)0; 731 } 732 mutex_exit(&cpu_idle_callb_mutex); 733 } 734 735 /*ARGSUSED*/ 736 static boolean_t 737 cpu_deep_idle_callb(void *arg, int code) 738 { 739 boolean_t rslt = B_TRUE; 740 741 mutex_enter(&cpu_idle_callb_mutex); 742 switch (code) { 743 case PM_DEFAULT_CPU_DEEP_IDLE: 744 /* 745 * Default policy is same as enable 746 */ 747 /*FALLTHROUGH*/ 748 case PM_ENABLE_CPU_DEEP_IDLE: 749 if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0) 750 break; 751 752 if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) { 753 disp_enq_thread = cstate_wakeup; 754 idle_cpu = cpu_idle_adaptive; 755 cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG; 756 } else { 757 rslt = B_FALSE; 758 } 759 break; 760 761 case PM_DISABLE_CPU_DEEP_IDLE: 762 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 763 break; 764 765 idle_cpu = non_deep_idle_cpu; 766 if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) { 767 disp_enq_thread = non_deep_idle_disp_enq_thread; 768 cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG; 769 } 770 break; 771 772 default: 773 cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n", 774 code); 775 break; 776 } 777 mutex_exit(&cpu_idle_callb_mutex); 778 return (rslt); 779 } 780 781 /*ARGSUSED*/ 782 static boolean_t 783 cpu_idle_cpr_callb(void *arg, int code) 784 { 785 boolean_t rslt = B_TRUE; 786 787 mutex_enter(&cpu_idle_callb_mutex); 788 switch (code) { 789 case CB_CODE_CPR_RESUME: 790 if (cstate_timer_callback(CB_CODE_CPR_RESUME)) { 791 /* 792 * Do not enable dispatcher hooks if disabled by user. 793 */ 794 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 795 break; 796 797 disp_enq_thread = cstate_wakeup; 798 idle_cpu = cpu_idle_adaptive; 799 } else { 800 rslt = B_FALSE; 801 } 802 break; 803 804 case CB_CODE_CPR_CHKPT: 805 idle_cpu = non_deep_idle_cpu; 806 disp_enq_thread = non_deep_idle_disp_enq_thread; 807 (void) cstate_timer_callback(CB_CODE_CPR_CHKPT); 808 break; 809 810 default: 811 cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code); 812 break; 813 } 814 mutex_exit(&cpu_idle_callb_mutex); 815 return (rslt); 816 } 817 818 /* 819 * handle _CST notification 820 */ 821 void 822 cpuidle_cstate_instance(cpu_t *cp) 823 { 824 #ifndef __xpv 825 cpupm_mach_state_t *mach_state = 826 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 827 cpu_acpi_handle_t handle; 828 struct machcpu *mcpu; 829 cpuset_t dom_cpu_set; 830 kmutex_t *pm_lock; 831 int result = 0; 832 processorid_t cpu_id; 833 834 if (mach_state == NULL) { 835 return; 836 } 837 838 ASSERT(mach_state->ms_cstate.cma_domain != NULL); 839 dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus; 840 pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock; 841 842 /* 843 * Do for all the CPU's in the domain 844 */ 845 mutex_enter(pm_lock); 846 do { 847 CPUSET_FIND(dom_cpu_set, cpu_id); 848 if (cpu_id == CPUSET_NOTINSET) 849 break; 850 851 ASSERT(cpu_id >= 0 && cpu_id < NCPU); 852 cp = cpu[cpu_id]; 853 mach_state = (cpupm_mach_state_t *) 854 cp->cpu_m.mcpu_pm_mach_state; 855 if (!(mach_state->ms_caps & CPUPM_C_STATES)) { 856 mutex_exit(pm_lock); 857 return; 858 } 859 handle = mach_state->ms_acpi_handle; 860 ASSERT(handle != NULL); 861 862 /* 863 * re-evaluate cstate object 864 */ 865 if (cpu_acpi_cache_cstate_data(handle) != 0) { 866 cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state" 867 " object Instance: %d", cpu_id); 868 } 869 mutex_enter(&cpu_lock); 870 mcpu = &(cp->cpu_m); 871 mcpu->max_cstates = cpu_acpi_get_max_cstates(handle); 872 if (mcpu->max_cstates > CPU_ACPI_C1) { 873 (void) cstate_timer_callback( 874 CST_EVENT_MULTIPLE_CSTATES); 875 disp_enq_thread = cstate_wakeup; 876 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 877 } else if (mcpu->max_cstates == CPU_ACPI_C1) { 878 disp_enq_thread = non_deep_idle_disp_enq_thread; 879 cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 880 (void) cstate_timer_callback(CST_EVENT_ONE_CSTATE); 881 } 882 mutex_exit(&cpu_lock); 883 884 CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result); 885 mutex_exit(pm_lock); 886 } while (result < 0); 887 #endif 888 } 889 890 /* 891 * handle the number or the type of available processor power states change 892 */ 893 void 894 cpuidle_manage_cstates(void *ctx) 895 { 896 cpu_t *cp = ctx; 897 cpupm_mach_state_t *mach_state = 898 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 899 boolean_t is_ready; 900 901 if (mach_state == NULL) { 902 return; 903 } 904 905 /* 906 * We currently refuse to power manage if the CPU is not ready to 907 * take cross calls (cross calls fail silently if CPU is not ready 908 * for it). 909 * 910 * Additionally, for x86 platforms we cannot power manage 911 * any one instance, until all instances have been initialized. 912 * That's because we don't know what the CPU domains look like 913 * until all instances have been initialized. 914 */ 915 is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(); 916 if (!is_ready) 917 return; 918 919 cpuidle_cstate_instance(cp); 920 } 921